---
title: "Replication Code for Escaping the Disengagement Dilemma: Two Field Experiments on Motivating Citizens to Report on Public Services" 
author: "Mark Buntaine, Daniel Nielson, Jacob Skaggs"
output: 
  html_document:
    theme: united
    toc: true
---

Replication software:
R version 3.6.1 ("Action of the Toes") running on Mac OS X 10.14.6

## Preliminaries and Setup

```{r rmd-setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE, fig.height = 9, fig.width = 9)
library(estimatr) #version 0.18.0
```

```{r ph1-setup, warning=FALSE}
library(ggmap) #version 3.0.0
library(rgdal) #version 1.4-4
library(rgeos) #version 0.5-1
library(maptools) #version 0.9-5
library(dplyr) #version 0.8.3
library(tidyr) #version 0.8.3
library(tmap) #version 2.3
library(ri) #version 0.9
library(reshape2) #version 1.4.3

lc1 <- readOGR(dsn="./", 
               layer="Kampala_villagesfff")

resp1.zones <- read.csv("./ph1_zone_sample_responsiveness_assigned.csv", stringsAsFactors=FALSE)
resp1.zones$location.id <- tolower(paste(resp1.zones$Division,resp1.zones$Parishes,resp1.zones$VNAME,sep="."))

ex.sample1 <- read.csv("./ph1_zone_sample.csv", stringsAsFactors=FALSE)

ph1 <- read.csv("./ph1_reports_by_reporter.csv", stringsAsFactors=FALSE)
output <- ph1[,c(1:4,6,9,12,15,18,21,24,27,30,33,36,39,42,45,48,51,54,57)] #This can be toggled to get the full information
#Note: Q1 is a language preference, rather than a substantive poll
#See: Phase1_OutgoingPrompts.csv for final times/dates of outgoing prompts

output$responses <- NA
for (i in 1:nrow(output)){
  output$responses[i] <- sum(!is.na(output)[i,6:22])
  output$first4weeks.responses[i] <- sum(!is.na(output)[i,6:10])
  output$last4weeks.responses[i] <- sum(!is.na(output)[i,11:22])
  output$last.response[i] <- max(which(!is.na(output)[i,6:22]))
  output$open.responses[i] <- sum(!is.na(output)[i,c(10,13,16,19,22)])
} #Warnings OK, -Inf on last.response line

output$active.ever <- ifelse(output$last.response>=2,1,0)
output$active.last4weeks <- ifelse(output$last.response>=7,1,0)
output$active.last2weeks <- ifelse(output$last.response>=13,1,0)

#Note: this is to avoid re-indexing the output DV generation above
ph1 <- data.frame(ph1,output[,23:ncol(output)])
output <- ph1

#Earlier file used to pull in parish names to object "output" below
demographics <- read.csv("./ph1_reporters_baseline.csv")
sum(output$full_number==demographics$full_number) #Ordering matches
output$nom <- ifelse(demographics$Nominated=="yes", 1, 0)
#Note: this variable records the treatment assignment recorded by enumeration, not assigned
output$female <- ifelse(demographics$gender=="female", 1, 0)
output$satisfied <- ifelse(demographics$satisfaction_rubbish_collectio=="very_satisfied" | demographics$satisfaction_rubbish_collectio=="satisfied",1,0)
output$years_zone <- demographics$years_zone
output$age <- demographics$age


###Setting up for analysis of Phase 1 data

##Notes:
#1. Data for assignment of reporters by enumerators does not exactly match master list, moving to ITT analysis for failure to treat
#2. no subjects recruited from Ntinda, Village 7 (reason was never documented and project manager/Jacob unsure)

##Zone cleaning
output$Vname <- tolower(output$Vname)
output$vname.clean <- output$Vname
#sort(unique(output$vname.clean))[!(sort(unique(output$vname.clean)) %in% sort(unique(hh$Zone..cleaned.))[-1])] #Lists village names needing cleaning
output$vname.clean <- ifelse(output$vname.clean=="biina zone a ", "biina zone a", output$vname.clean)
output$vname.clean <- ifelse(output$vname.clean=="mutajazib", "mutajazi b", output$vname.clean)
output$vname.clean <- ifelse(output$vname.clean=="ucb ", "ucb", output$vname.clean)
output$vname.clean <- ifelse(output$vname.clean=="ueb ", "ueb", output$vname.clean)
output$vname.clean <- ifelse(output$vname.clean=="village 11 ", "village 11", output$vname.clean)
output$vname.clean <- ifelse(output$vname.clean=="village 12\n", "village 12", output$vname.clean)
output$vname.clean <- ifelse(output$vname.clean=="village 12 ", "village 12", output$vname.clean)
output$vname.clean <- ifelse(output$vname.clean=="wasawa", "waswa", output$vname.clean)
output$vname.clean <- ifelse(output$vname.clean=="zone iv ", "zone iv", output$vname.clean)

demographics$Vname <- tolower(demographics$Vname)
demographics$vname.clean <- demographics$Vname
#sort(unique(demographics$vname.clean))[!(sort(unique(demographics$vname.clean)) %in% sort(unique(hh$Zone..cleaned.))[-1])] #Lists village names needing cleaning
demographics$vname.clean <- ifelse(demographics$vname.clean=="biina zone a ", "biina zone a", demographics$vname.clean)
demographics$vname.clean <- ifelse(demographics$vname.clean=="mutajazib", "mutajazi b", demographics$vname.clean)
demographics$vname.clean <- ifelse(demographics$vname.clean=="ucb ", "ucb", demographics$vname.clean)
demographics$vname.clean <- ifelse(demographics$vname.clean=="ueb ", "ueb", demographics$vname.clean)
demographics$vname.clean <- ifelse(demographics$vname.clean=="village 11 ", "village 11", demographics$vname.clean)
demographics$vname.clean <- ifelse(demographics$vname.clean=="village 12\n", "village 12", demographics$vname.clean)
demographics$vname.clean <- ifelse(demographics$vname.clean=="village 12 ", "village 12", demographics$vname.clean)
demographics$vname.clean <- ifelse(demographics$vname.clean=="wasawa", "waswa", demographics$vname.clean)
demographics$vname.clean <- ifelse(demographics$vname.clean=="zone iv ", "zone iv", demographics$vname.clean)

###Subset of villages with/without contiguous villages
#Note: must create object "spill.dta" from script "SolidWaste_Spillover.R" before running this step
source("SolidWaste_ph1_spillover_direct.R")
spill.dta$Vname[duplicated(spill.dta$Vname)] #Have to use parishes to get unique values

yes.list <- tolower(spill.dta$Vname[spill.dta$con.test])
no.list <- tolower(spill.dta$Vname[!spill.dta$con.test])

l <- output$full_number[duplicated(output$full_number)]

see <- output[output$full_number %in% l,] #There are 4 duplicated numbers in Ph1 sample of reporters
see2 <- demographics[demographics$full_number %in% l,] #Same duplicates in the demographics file

#Removing duplicates from output
output <- output[-c(122,140,479,566,800,814),] #Removing ambiguous duplicates as revealed in object "see" immediately above
demographics <- demographics[-c(122,140,479,566,800,814),]
#l2 <- output$full_number[duplicated(output$full_number)] #Final number of responses reached here: 493 (10/1/18)

demographics$loc.id <- tolower(paste(demographics$vname.clean,demographics$Parish,sep="."))
unique(demographics$loc.id[!(demographics$loc.id %in% spill.dta$loc.id)])

#Cleaning loc.id in demographics to match original LC1 shapefiles (which may have incorrect spellings)
demographics$loc.id <- ifelse(demographics$loc.id=="bungalows iii.bugalobi","bungalows iii.bugolobi",demographics$loc.id)
demographics$loc.id <- ifelse(demographics$loc.id=="nateete central d.nateete","nateete central d.natete",demographics$loc.id)
unique(demographics$loc.id[!(demographics$loc.id %in% spill.dta$loc.id)]) #Confirming full match with randomization file

#Porting loc.id into the output file for full match / subseting
output$loc.id <- demographics$loc.id

#Porting in variables from dta.spill file to output file
for (i in 1:nrow(output)){
  output$zone.id[i] <- subset(spill.dta,loc.id==output$loc.id[i])[1,1]
  output$assignment[i] <- subset(spill.dta,loc.id==output$loc.id[i])[1,4]
  output$con.test[i] <- subset(spill.dta,loc.id==output$loc.id[i])[1,5]
}

#Fixing the zone.id for Zone I that was mistakenly included as a duplicate
output$zone.id <- ifelse(output$zone.id==695 & output$Nominated=="yes", 607, output$zone.id)
output$assignment <- ifelse(output$zone.id==607, "nomination", output$assignment)

table(output$Nominated,output$assignment) #Overall there is high compliance, but not total
#Note: "Nominated" is the direct value entered in the survey by enumerators, not the randomly assigned value for the reporter position
output$treat.problem <- ifelse((output$Nominated=="no" & output$assignment=="nomination") | (output$Nominated=="yes" & output$assignment=="random"), 1, 0)
see <- subset(output, treat.problem==1)
#Note: there is non-compliance with assigned recruitment in approximately 5% of cases, likely due to enumerator entry errors, moving to ITT analysis
```


```{r ph2-data-read}

library(ggplot2) #version 3.2.1
library(stargazer) #version 5.2.2
library(plyr) #version 1.8.4
library(gridExtra) #version 2.3

#Subject responses during Phase 2
ph2 <- read.csv("./ph2_reports_coded.csv", comment.char="#", stringsAsFactors=FALSE)
#Note: this file is updated to have final, compiled usability variable "Data.Usable.mb"

#Reimport the mobile phone numbers lost in manual saving of CSV file
ph2.old <- read.csv("./ph2_reports_coded_with_number.csv", comment.char="#", stringsAsFactors=FALSE) #this file has the full mobile phone numbers, which were overwritten in "ph2" w/ mb usability codes
ph2.old <- ph2.old[order(ph2.old$Subject.ID),]
ph2 <- ph2[order(ph2$Subject.ID),]
sum(ph2$Subject.ID==ph2.old$Subject.ID) #6883, total match
ph2$Mobile.Number <- ph2.old$Mobile.Number

ph1.reporters <- read.csv("./ph1_reporters_baseline_responsiveness_assigned.csv", stringsAsFactors=FALSE)
ph1.reporters$full_number <- paste("256",ph1.reporters$mobile_number,sep="")

ex.sample2 <- read.csv("./ph2_zone_sample.csv", stringsAsFactors=FALSE)
names(ex.sample2)[12] <- "location.id.realized"
ex.sample2$location.id <- tolower(paste(ex.sample2$Division,ex.sample2$Parishes,ex.sample2$VNAME,sep="."))

lc1.compliance <- read.csv("./ph2_lc1_announcement_survey.csv", stringsAsFactors=FALSE)
```

```{r ph2-setup}
library(estimatr) #version 0.18.0

###Merging and checking the drawn to realized sample of zones ####
ph2$location.id <- tolower(paste(ph2$Division.of.Residence,ph2$Parish.of.Residence,ph2$Zone.of.Residence,sep="."))

insample.zones <- c(resp1.zones$location.id,ex.sample2$location.id[ex.sample2$InSample==1])
insample.zone.ids <- c(resp1.zones$X, ex.sample2$X[ex.sample2$InSample==1])
insample.dta <- data.frame(insample.zones,insample.zone.ids)

length(unique(insample.dta$insample.zones))
insample.dta$insample.zones[duplicated(insample.dta$insample.zones)]
see1 <- resp1.zones[resp1.zones$location.id=="nakawa.mbuya ii.zone i",]
#Note: there is a non-unique zone name from the Phase I sample, investigate
#Note: both of the non-unique zones are assigned to the responsiveness condition in Phase 2
#From Jacob (9/16): "We actually sent two teams to this zone on the same day and recruited 24 subjects; 1/2 subjects were recruited using random recruitment and 1/2 subjects using nomination."
#Note: see the post-compile code for the fix to this
#Note: for the purpose of the Phase I analysis, these duplicates are considered separate zones

check <- ph2[!(ph2$location.id %in% insample.dta$insample.zones),]
unique(check$location.id) #Have to check why these zones appear in the data
#makindye.makindye i.kipamba: out-of-sample replacement zone; deviation from sampling protocol
#rubaga.namirembe.mengo town: out-of-sample replacement zone; deviation from sampling protocol
#makindye.katwe i.musoke: out-of-sample replacement zone; deviation from sampling protocol
o.s.r <- c("makindye.makindye i.kipamba","rubaga.namirembe.mengo town","makindye.katwe i.musoke")
ph2$out.sample.replacement <- ifelse(ph2$location.id %in% o.s.r, 1, 0)

#rubaga.kabalagala.kiwafu: appears that district is incorrect in data; should be Makindye Division
ph2$location.id <- ifelse(ph2$location.id=="rubaga.kabalagala.kiwafu","makindye.kabalagala.kiwafu",ph2$location.id)
#kawempe.kikaya.kisota: appears that district is incorrect in data; should be Nakawa Division
ph2$location.id <- ifelse(ph2$location.id=="kawempe.kikaya.kisota","nakawa.kikaya.kisota",ph2$location.id)

#rubaga.nateete.nateete central d: spelling difference; revert to original
ph2$location.id <- ifelse(ph2$location.id=="rubaga.nateete.nateete central d","rubaga.natete.nateete central d",ph2$location.id)
#central.kisenyi ii.kiganda: spelling difference; revert to original
ph2$location.id <- ifelse(ph2$location.id=="central.kisenyi ii.kiganda","central.kiseny ii.kiganda",ph2$location.id)
#central.kisenyi ii.mengo hill: spelling difference; revert to original
ph2$location.id <- ifelse(ph2$location.id=="central.kisenyi ii.mengo hill","central.kiseny ii.mengo hill",ph2$location.id)

length(unique(ph2$location.id)) #Note: not all zones generated reports in Phase 2
insample.dta$insample.zones[!(insample.dta$insample.zones %in% ph2$location.id)]
#"rubaga.lungujja.makamba": Phase 1 recruitment; no reports
#"makindye.bukasa.namuwongo b": Phase 1 recruitment; no reports 
#"makindye.kisugu.mutajazi c": Phase 1 recruitment; no reports   
#"nakawa.ntinda.village 7": Phase 1 recruitment; no reports
#"central.kamwokya i.village a": Phase 1 recruitment; no reports
#"makindye.kansanga-muyenga.pepsi": Phase 2 recruitment; no reports

#"central.mengo.namirembe": Zone replaced in Phase 2 with out-of-sample replacement
#"rubaga.najjanankumbi i.kipamba": Zone replaced in Phase 2 with out-of-sample replacement
#"central.kisenyi iii.musoke": Zone replaced in Phase 2 with out-of-sample replacement
removed.z <- c("central.mengo.namirembe","rubaga.najjanankumbi i.kipamba","central.kisenyi iii.musoke")
removed <- ifelse(insample.dta$insample.zones %in% removed.z, 1, 0)

#Merging and checking the assigned and realized responsiveness treatment
table(ph2$location.id,ph2$Responsiveness) #No cross-over of treatment assignment
ph2.tab <- as.data.frame.matrix(table(ph2$location.id,ph2$Responsiveness))
names(ph2.tab) <- c("X0","X1")
treated.list <- row.names(ph2.tab)[ph2.tab$X1>0]
control.list <- row.names(ph2.tab)[ph2.tab$X0>0]

resp1.zones$location.id[resp1.zones$responsiveness==0] %in% control.list
(resp1.zones$location.id[resp1.zones$responsiveness==0])[c(15,27,38,39)]
#"makindye.bukasa.namuwongo b": Phase 1 recruitment; no reports
#"makindye.kisugu.mutajazi c": Phase 1 recruitment; no reports
#"nakawa.ntinda.village 7": Phase 1 recruitment; no reports        
#"central.kamwokya i.village a": Phase 1 recruitment; no reports
#Finding: no zone-wise non-compliance with treatment assignment

resp1.zones$location.id[resp1.zones$responsiveness==1] %in% treated.list
(resp1.zones$location.id[resp1.zones$responsiveness==1])[14]
#"rubaga.lungujja.makamba": Phase 1 recruitment; no reports
#Finding: no zone-wise non-compliance with treatment assignment

#Looking at the data
table(ph2$Responsiveness) #Impression: large treatment effect of responsiveness

#Adding recruitment phase to ph2 reports file
ph2$phase <- ifelse(ph2$location.id %in% resp1.zones$location.id, "one", "two") #Checked: no missing phase indicators

#Adding other treatment assignments to "ph2" object
p1.neighbor.list <- resp1.zones$location.id[resp1.zones$assignment=="nomination"]
p1.random.list <- resp1.zones$location.id[resp1.zones$assignment=="random"]
p2.lc1.list <- ex.sample2$location.id[ex.sample2$lc1.recruit==1 & ex.sample2$InSample==1]
p2.random.list <- ex.sample2$location.id[ex.sample2$lc1.recruit==0 & ex.sample2$InSample==1]
p2.announce_trt.list <- ex.sample2$location.id[ex.sample2$lc1.announce==1 & ex.sample2$InSample==1]
p2.announce_ctl.list <- ex.sample2$location.id[ex.sample2$lc1.announce==0 & ex.sample2$InSample==1]

ph2$recruitment <- ifelse(ph2$location.id %in% p1.neighbor.list, "neighbor",
                          ifelse(ph2$location.id %in% p2.lc1.list, "lc1",
                                 ifelse(ph2$location.id %in% p1.random.list | ph2$location.id %in% p2.random.list, "random", NA)))
#table(ph2$recruitment, useNA = "always") #To Do: figure out why NA's are in the data
#see <- subset(ph2,is.na(recruitment))
#table(see$location.id) #OK; these are the location.id's for zones were replaced out-of-sample and excluded from the analysis

ph2$lc1.announce <- ifelse(ph2$location.id %in% p2.announce_trt.list, 1,
                    ifelse(ph2$location.id %in% p2.announce_ctl.list, 0, NA))
#table(ph2$lc1.announce, useNA="always")

ph2$lc1.announce.w.ph1 <- ifelse(ph2$phase=="one",0,ph2$lc1.announce)
#table(ph2$lc1.announce.w.ph1, useNA = "always")


###Creating Zone-Wise Dataframe for all reporting in Phase 2 ####
#To Do: include the out-of-sample replacement zones for extended analysis
ph2.zones <- data.frame(insample.zones,removed)
names(ph2.zones)[1] <- "location.id"
ph2.zones$phase <- ifelse(ph2.zones$location.id %in% resp1.zones$location.id, "one", "two")

for (i in 1:nrow(ph2.zones)){

  if (ph2.zones$phase[i]=="one"){
  ph2.zones$recruitment[i] <- resp1.zones[resp1.zones$location.id==ph2.zones$location.id[i],"assignment"]
  ph2.zones$lc1.announce[i] <- 0
  ph2.zones$responsiveness[i] <- resp1.zones[resp1.zones$location.id==ph2.zones$location.id[i],"responsiveness"]
  }
  
  if (ph2.zones$phase[i]=="two"){
    ph2.zones$recruitment[i] <- ifelse((ex.sample2[ex.sample2$location.id==ph2.zones$location.id[i],"lc1.recruit"])==0, "random", "lc1.recruit")
    ph2.zones$lc1.announce[i] <- ex.sample2[ex.sample2$location.id==ph2.zones$location.id[i],"lc1.announce"]
    ph2.zones$responsiveness[i] <- ex.sample2[ex.sample2$location.id==ph2.zones$location.id[i],"Responsiveness"]
  }
} #Warnings fixed at end of block, due to duplicate nakawa.mbuya ii.zone i in Phase 1 sample
ph2.zones$recruitment <- factor(ph2.zones$recruitment, levels=c("random","nomination","lc1.recruit"))

for (i in 1:nrow(ph2.zones)){
  ph2.zones$total.responses[i] <- nrow(subset(ph2, location.id==ph2.zones$location.id[i] & Data.Usable.mb=="Yes"))
  ph2.zones$last2weeks.responses[i] <- nrow(subset(ph2, location.id==ph2.zones$location.id[i] & Question.Number>=13 & Data.Usable.mb=="Yes"))
  
  sub <- subset(ph2, location.id==ph2.zones$location.id[i] & Data.Usable.mb=="Yes")
  ph2.zones$active.reporters[i] <- nrow(sub[!duplicated(sub$Subject.ID),])
  
  ph2.zones$q1.responses[i] <- nrow(subset(ph2, location.id==ph2.zones$location.id[i] & Question.Number==1 & Data.Usable.mb=="Yes"))
  ph2.zones$q2.responses[i] <- nrow(subset(ph2, location.id==ph2.zones$location.id[i] & Question.Number==2 & Data.Usable.mb=="Yes"))
  ph2.zones$q3.responses[i] <- nrow(subset(ph2, location.id==ph2.zones$location.id[i] & Question.Number==3 & Data.Usable.mb=="Yes"))
  ph2.zones$q4.responses[i] <- nrow(subset(ph2, location.id==ph2.zones$location.id[i] & Question.Number==4 & Data.Usable.mb=="Yes"))
  ph2.zones$q5.responses[i] <- nrow(subset(ph2, location.id==ph2.zones$location.id[i] & Question.Number==5 & Data.Usable.mb=="Yes"))
  ph2.zones$q6.responses[i] <- nrow(subset(ph2, location.id==ph2.zones$location.id[i] & Question.Number==6 & Data.Usable.mb=="Yes"))
  ph2.zones$q7.responses[i] <- nrow(subset(ph2, location.id==ph2.zones$location.id[i] & Question.Number==7 & Data.Usable.mb=="Yes"))
  ph2.zones$q8.responses[i] <- nrow(subset(ph2, location.id==ph2.zones$location.id[i] & Question.Number==8 & Data.Usable.mb=="Yes"))
  ph2.zones$q9.responses[i] <- nrow(subset(ph2, location.id==ph2.zones$location.id[i] & Question.Number==9 & Data.Usable.mb=="Yes"))
  ph2.zones$q10.responses[i] <- nrow(subset(ph2, location.id==ph2.zones$location.id[i] & Question.Number==10 & Data.Usable.mb=="Yes"))
  ph2.zones$q11.responses[i] <- nrow(subset(ph2, location.id==ph2.zones$location.id[i] & Question.Number==11 & Data.Usable.mb=="Yes"))
  ph2.zones$q12.responses[i] <- nrow(subset(ph2, location.id==ph2.zones$location.id[i] & Question.Number==12 & Data.Usable.mb=="Yes"))
  ph2.zones$q13.responses[i] <- nrow(subset(ph2, location.id==ph2.zones$location.id[i] & Question.Number==13 & Data.Usable.mb=="Yes"))
  ph2.zones$q14.responses[i] <- nrow(subset(ph2, location.id==ph2.zones$location.id[i] & Question.Number==14 & Data.Usable.mb=="Yes"))
  ph2.zones$q15.responses[i] <- nrow(subset(ph2, location.id==ph2.zones$location.id[i] & Question.Number==15 & Data.Usable.mb=="Yes"))
}

##Adding reporters per zone
p2.reporters <- read.csv("./ph2_reporters_baseline.csv", stringsAsFactors=FALSE) #uncleaned list

ph2.reporters <- read.csv("./ph1ph2_reporters_list_assigned.csv", stringsAsFactors=FALSE)[,1:11]
ph2.reporters$location.id <- tolower(paste(ph2.reporters$Division.of.Residence,ph2.reporters$Parish.of.Residence,ph2.reporters$Zone.of.Residence, sep="."))
r.zones <- sort(unique(ph2.reporters$location.id))
r.zones[!(r.zones %in% ph2.zones$location.id)] #location.ids without a match in the zone-wise analysis file

#central.kisenyi ii.kiganda: spelling difference; revert to original
ph2.reporters$location.id <- ifelse(ph2.reporters$location.id=="central.kisenyi ii.kiganda","central.kiseny ii.kiganda",ph2.reporters$location.id)

#central.kisenyi ii.mengo hill: spelling difference; revert to original
ph2.reporters$location.id <- ifelse(ph2.reporters$location.id=="central.kisenyi ii.mengo hill","central.kiseny ii.mengo hill",ph2.reporters$location.id)

#kawempe.kikaya.kisota: appears that district is incorrect in data; should be Nakawa Division
ph2.reporters$location.id <- ifelse(ph2.reporters$location.id=="kawempe.kikaya.kisota","nakawa.kikaya.kisota",ph2.reporters$location.id)

#rubaga.kabalagala.kiwafu: appears that district is incorrect in data; should be Makindye Division
ph2.reporters$location.id <- ifelse(ph2.reporters$location.id=="rubaga.kabalagala.kiwafu","makindye.kabalagala.kiwafu",ph2.reporters$location.id)

#rubaga.nateete.nateete central d: spelling difference; revert to original
ph2.reporters$location.id <- ifelse(ph2.reporters$location.id=="rubaga.nateete.nateete central d","rubaga.natete.nateete central d",ph2.reporters$location.id)

#"rubaga.mbuya ii.zone 7": District entered incorrectly in reporter file
ph2.reporters$location.id <- ifelse(ph2.reporters$location.id=="rubaga.mbuya ii.zone 7","nakawa.mbuya ii.zone 7",ph2.reporters$location.id)

#makindye.makindye i.kipamba: out-of-sample replacement zone; deviation from sampling protocol
#rubaga.namirembe.mengo town: out-of-sample replacement zone; deviation from sampling protocol
#makindye.katwe i.musoke: out-of-sample replacement zone; deviation from sampling protocol

for (i in 1:nrow(ph2.zones)){
  ph2.zones$total.reporters[i] <- nrow(subset(ph2.reporters, location.id==ph2.zones$location.id[i]))
}

check <- subset(ph2.zones, total.reporters==0)
#Only nakawa.ntinda.village 7 has no reporters, but was not removed from the sample

ph2.zones <- subset(ph2.zones, location.id!="nakawa.ntinda.village 7")

tofix <- subset(ph2.zones, location.id=="nakawa.mbuya ii.zone i")

#Fixing dual method of recruitment in nakawa.mbuya ii.zone i
ph2.zones$recruitment[ph2.zones$location.id=="nakawa.mbuya ii.zone i"] <- c("random","nomination")

ph2.reporters.zonei <- subset(ph2.reporters, location.id=="nakawa.mbuya ii.zone i")
for (i in 1:nrow(ph2.reporters.zonei)){
  ph2.reporters.zonei$Recruitment.Treatment[i] <- ph1.reporters$Nominated[ph1.reporters$mobile_number==ph2.reporters.zonei$Mobile.Number[i]]
}
ph2.reporters.zonei$Recruitment.Treatment <- ifelse(ph2.reporters.zonei$Recruitment.Treatment=="no", "random", "nomination")

ph2.zones$total.reporters[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="random"] <- nrow(subset(ph2.reporters.zonei, Recruitment.Treatment=="random"))
ph2.zones$total.reporters[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="nomination"] <- nrow(subset(ph2.reporters.zonei, Recruitment.Treatment=="nomination"))

zi.ran <- paste("256",ph2.reporters.zonei$Mobile.Number[ph2.reporters.zonei$Recruitment.Treatment=="random"],sep="")
zi.nom <- paste("256",ph2.reporters.zonei$Mobile.Number[ph2.reporters.zonei$Recruitment.Treatment=="nomination"],sep="")

ph2.zones$total.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="random"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.ran & Data.Usable.mb=="Yes"))
ph2.zones$total.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="nomination"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.nom & Data.Usable.mb=="Yes"))

ph2.zones$last2weeks.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="random"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.ran & Question.Number>=13 & Data.Usable.mb=="Yes"))
ph2.zones$last2weeks.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="nomination"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.nom & Question.Number>=13 & Data.Usable.mb=="Yes"))

sub.ran <- subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.ran & Data.Usable.mb=="Yes")
sub.nom <- subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.nom & Data.Usable.mb=="Yes")

ph2.zones$active.reporters[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="random"] <- nrow(sub.ran[!duplicated(sub.ran$Subject.ID),])
ph2.zones$active.reporters[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="nomination"] <- nrow(sub.nom[!duplicated(sub.nom$Subject.ID),])

ph2.zones$q1.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="random"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.ran & Question.Number==1 & Data.Usable.mb=="Yes"))
ph2.zones$q1.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="nomination"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.nom & Question.Number==1 & Data.Usable.mb=="Yes"))

ph2.zones$q2.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="random"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.ran & Question.Number==2 & Data.Usable.mb=="Yes"))
ph2.zones$q2.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="nomination"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.nom & Question.Number==2 & Data.Usable.mb=="Yes"))

ph2.zones$q3.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="random"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.ran & Question.Number==3 & Data.Usable.mb=="Yes"))
ph2.zones$q3.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="nomination"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.nom & Question.Number==3 & Data.Usable.mb=="Yes"))

ph2.zones$q4.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="random"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.ran & Question.Number==4 & Data.Usable.mb=="Yes"))
ph2.zones$q4.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="nomination"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.nom & Question.Number==4 & Data.Usable.mb=="Yes"))

ph2.zones$q5.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="random"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.ran & Question.Number==5 & Data.Usable.mb=="Yes"))
ph2.zones$q5.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="nomination"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.nom & Question.Number==5 & Data.Usable.mb=="Yes"))

ph2.zones$q6.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="random"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.ran & Question.Number==6 & Data.Usable.mb=="Yes"))
ph2.zones$q6.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="nomination"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.nom & Question.Number==6 & Data.Usable.mb=="Yes"))

ph2.zones$q7.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="random"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.ran & Question.Number==7 & Data.Usable.mb=="Yes"))
ph2.zones$q7.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="nomination"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.nom & Question.Number==7 & Data.Usable.mb=="Yes"))

ph2.zones$q8.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="random"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.ran & Question.Number==8 & Data.Usable.mb=="Yes"))
ph2.zones$q8.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="nomination"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.nom & Question.Number==8 & Data.Usable.mb=="Yes"))

ph2.zones$q9.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="random"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.ran & Question.Number==9 & Data.Usable.mb=="Yes"))
ph2.zones$q9.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="nomination"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.nom & Question.Number==9 & Data.Usable.mb=="Yes"))

ph2.zones$q10.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="random"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.ran & Question.Number==10 & Data.Usable.mb=="Yes"))
ph2.zones$q10.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="nomination"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.nom & Question.Number==10 & Data.Usable.mb=="Yes"))

ph2.zones$q11.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="random"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.ran & Question.Number==11 & Data.Usable.mb=="Yes"))
ph2.zones$q11.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="nomination"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.nom & Question.Number==11 & Data.Usable.mb=="Yes"))

ph2.zones$q12.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="random"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.ran & Question.Number==12 & Data.Usable.mb=="Yes"))
ph2.zones$q12.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="nomination"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.nom & Question.Number==12 & Data.Usable.mb=="Yes"))

ph2.zones$q13.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="random"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.ran & Question.Number==13 & Data.Usable.mb=="Yes"))
ph2.zones$q13.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="nomination"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.nom & Question.Number==13 & Data.Usable.mb=="Yes"))

ph2.zones$q14.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="random"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.ran & Question.Number==14 & Data.Usable.mb=="Yes"))
ph2.zones$q14.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="nomination"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.nom & Question.Number==14 & Data.Usable.mb=="Yes"))

ph2.zones$q15.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="random"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.ran & Question.Number==15 & Data.Usable.mb=="Yes"))
ph2.zones$q15.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="nomination"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.nom & Question.Number==15 & Data.Usable.mb=="Yes"))


###Finishing Reporter-Wise Dataframe for all reporting in Phase 2 ####

ph2.reporters$full_number <- paste("256", ph2.reporters$Mobile.Number,sep="")
out.of.protocol.list <- unique(ph2.reporters[!(ph2.reporters$location.id %in% ex.sample2$location.id) & ph2.reporters$Recruitment.Phase==2, "location.id"])
ph2.reporters <- ph2.reporters[!(ph2.reporters$location.id %in% out.of.protocol.list),]

for (i in 1:nrow(ph2.reporters)){
  
  if (ph2.reporters$Recruitment.Phase[i]==1 & ph2.reporters$location.id[i]!="nakawa.mbuya ii.zone i"){
    ph2.reporters$recruitment[i] <- resp1.zones[resp1.zones$location.id==ph2.reporters$location.id[i],"assignment"]
    ph2.reporters$lc1.announce[i] <- 0
  }
  
  if (ph2.reporters$Recruitment.Phase[i]==1 & ph2.reporters$location.id[i]=="nakawa.mbuya ii.zone i"){
    ph2.reporters$recruitment[i] <- ifelse(ph1.reporters$Nominated[ph1.reporters$mobile_number==ph2.reporters$Mobile.Number[i]]=="no","random","nomination")
    ph2.reporters$lc1.announce[i] <- 0
  }
  
  if (ph2.reporters$Recruitment.Phase[i]==2){
    ph2.reporters$recruitment[i] <- ifelse((ex.sample2[ex.sample2$location.id==ph2.reporters$location.id[i],"lc1.recruit"])==0, "random", "lc1.recruit")
    ph2.reporters$lc1.announce[i] <- ex.sample2[ex.sample2$location.id==ph2.reporters$location.id[i],"lc1.announce"]
  }
}

ph2.reporters$recruitment <- factor(ph2.reporters$recruitment, levels=c("random","nomination","lc1.recruit"))

for (i in 1:nrow(ph2.reporters)){
  ph2.reporters$total.responses[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Data.Usable.mb=="Yes"))
  ph2.reporters$last2week.responses[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number>=13 & Data.Usable.mb=="Yes"))
}
ph2.reporters$active.ever <- ifelse(ph2.reporters$total.responses>0,1,0)

table(ph2.reporters$Responsiveness, ph2.reporters$active.ever)
table(ph2.reporters$Responsiveness, ph2.reporters$total.responses)

###Adding spillover variable from spill.dta2
spill.dta2 <- read.csv("./ph2_responsiveness_contiguous.csv")
#Note: both "nakawa.mbuya ii.zone i" have contiguous zone, no need to distinguish in loop
for (i in 1:nrow(ph2.reporters)){
  ph2.reporters$con.test[i] <- subset(spill.dta2, location.id==ph2.reporters$location.id[i])[1,"con.test"]
}

###Adding probability of Responsiveness exposure
resp.exposure.prob <- read.csv("./ph2_responsiveness_spillover_probability.csv")
resp.exposure.prob$location.id <- as.character(resp.exposure.prob$location.id)
resp.exposure.prob$location.id <- ifelse(resp.exposure.prob$location.id=="nakawa.kyanja.kulambiro-kondogolo","nakawa.kyanja.kulambiro",resp.exposure.prob$location.id) #Naming inconsistency that does not indicate non-compliance with recruitment protocol
#unique(ph2.reporters$location.id)[!(unique(ph2.reporters$location.id) %in% resp.exposure.prob$location.id)]

for (i in 1:nrow(ph2.reporters)){
  ph2.reporters$d0i0.prob[i] <- subset(resp.exposure.prob, location.id==ph2.reporters$location.id[i])[1,"d0i0"]/100000
  ph2.reporters$d1i0.prob[i] <- subset(resp.exposure.prob, location.id==ph2.reporters$location.id[i])[1,"d1i0"]/100000
  ph2.reporters$d0i1.prob[i] <- subset(resp.exposure.prob, location.id==ph2.reporters$location.id[i])[1,"d0i1"]/100000
  ph2.reporters$d1i1.prob[i] <- subset(resp.exposure.prob, location.id==ph2.reporters$location.id[i])[1,"d1i1"]/100000
  ph2.reporters$direct[i] <- subset(resp.exposure.prob, location.id==ph2.reporters$location.id[i])[1,"direct"]
  ph2.reporters$indirect[i] <- subset(resp.exposure.prob, location.id==ph2.reporters$location.id[i])[1,"indirect"]
}

for (i in 1:nrow(ph2.reporters)){
  
  if (ph2.reporters$location.id[i]=="nakawa.mbuya ii.zone i" & ph2.reporters$recruitment[i]=="random"){
  ph2.reporters$d0i0.prob[i] <- subset(resp.exposure.prob, zone.id==695)[1,"d0i0"]/100000
  ph2.reporters$d1i0.prob[i] <- subset(resp.exposure.prob, zone.id==695)[1,"d1i0"]/100000
  ph2.reporters$d0i1.prob[i] <- subset(resp.exposure.prob, zone.id==695)[1,"d0i1"]/100000
  ph2.reporters$d1i1.prob[i] <- subset(resp.exposure.prob, zone.id==695)[1,"d1i1"]/100000
  ph2.reporters$direct[i] <- subset(resp.exposure.prob, location.id==ph2.reporters$location.id[i])[1,"direct"]
  ph2.reporters$indirect[i] <- subset(resp.exposure.prob, location.id==ph2.reporters$location.id[i])[1,"indirect"]
  }
  
  if (ph2.reporters$location.id[i]=="nakawa.mbuya ii.zone i" & ph2.reporters$recruitment[i]=="nomination"){
    ph2.reporters$d0i0.prob[i] <- subset(resp.exposure.prob, zone.id==607)[1,"d0i0"]/100000
    ph2.reporters$d1i0.prob[i] <- subset(resp.exposure.prob, zone.id==607)[1,"d1i0"]/100000
    ph2.reporters$d0i1.prob[i] <- subset(resp.exposure.prob, zone.id==607)[1,"d0i1"]/100000
    ph2.reporters$d1i1.prob[i] <- subset(resp.exposure.prob, zone.id==607)[1,"d1i1"]/100000
    ph2.reporters$direct[i] <- subset(resp.exposure.prob, location.id==ph2.reporters$location.id[i])[1,"direct"]
    ph2.reporters$indirect[i] <- subset(resp.exposure.prob, location.id==ph2.reporters$location.id[i])[1,"indirect"]
  }
} #Fixing probabilities for the duplicated "nakawa.mbuya ii.zone i" zones

ph2.reporters$indirect.prob <- ph2.reporters$d0i1.prob + ph2.reporters$d1i1.prob

#Checking assignment between files
table(ph2.reporters$Responsiveness, ph2.reporters$direct)
see <- subset(ph2.reporters, Responsiveness==1 & direct==0)
#All discrepancies in responsiveness treatment are for Kakato II (treatment error), see email "central.bukesa.kakato ii" on treatment error

ph2.reporters$exposure <- paste("d",ph2.reporters$direct,"i",ph2.reporters$indirect, sep="")

ph2.reporters$prob.weight[ph2.reporters$direct==1 & ph2.reporters$indirect==1] <- ph2.reporters$d1i1.prob[ph2.reporters$direct==1 & ph2.reporters$indirect==1]
ph2.reporters$prob.weight[ph2.reporters$direct==1 & ph2.reporters$indirect==0] <- ph2.reporters$d1i0.prob[ph2.reporters$direct==1 & ph2.reporters$indirect==0]
ph2.reporters$prob.weight[ph2.reporters$direct==0 & ph2.reporters$indirect==1] <- ph2.reporters$d0i1.prob[ph2.reporters$direct==0 & ph2.reporters$indirect==1]
ph2.reporters$prob.weight[ph2.reporters$direct==0 & ph2.reporters$indirect==0] <- ph2.reporters$d0i0.prob[ph2.reporters$direct==0 & ph2.reporters$indirect==0]


###Adding responses to individual questions
for (i in 1:nrow(ph2.reporters)){
  ph2.reporters$q1.response[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==1 & Data.Usable.mb=="Yes")) > 0
  ph2.reporters$q2.response[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==2 & Data.Usable.mb=="Yes")) > 0
  ph2.reporters$q3.response[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==3 & Data.Usable.mb=="Yes")) > 0
  ph2.reporters$q4.response[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==4 & Data.Usable.mb=="Yes")) > 0
  ph2.reporters$q5.response[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==5 & Data.Usable.mb=="Yes")) > 0
  ph2.reporters$q6.response[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==6 & Data.Usable.mb=="Yes")) > 0
  ph2.reporters$q7.response[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==7 & Data.Usable.mb=="Yes")) > 0
  ph2.reporters$q8.response[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==8 & Data.Usable.mb=="Yes")) > 0
  ph2.reporters$q9.response[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==9 & Data.Usable.mb=="Yes")) > 0
  ph2.reporters$q10.response[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==10 & Data.Usable.mb=="Yes")) > 0
  ph2.reporters$q11.response[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==11 & Data.Usable.mb=="Yes")) > 0
  ph2.reporters$q12.response[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==12 & Data.Usable.mb=="Yes")) > 0
  ph2.reporters$q13.response[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==13 & Data.Usable.mb=="Yes")) > 0
  ph2.reporters$q14.response[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==14 & Data.Usable.mb=="Yes")) > 0
  ph2.reporters$q15.response[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==15 & Data.Usable.mb=="Yes")) > 0
}

for (i in 1:nrow(ph2.reporters)){
  ph2.reporters$q1.responses.cleaned[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==1 & Data.Usable.mb=="Yes"))
  ph2.reporters$q2.responses.cleaned[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==2 & Data.Usable.mb=="Yes"))
  ph2.reporters$q3.responses.cleaned[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==3 & Data.Usable.mb=="Yes"))
  ph2.reporters$q4.responses.cleaned[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==4 & Data.Usable.mb=="Yes"))
  ph2.reporters$q5.responses.cleaned[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==5 & Data.Usable.mb=="Yes"))
  ph2.reporters$q6.responses.cleaned[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==6 & Data.Usable.mb=="Yes"))
  ph2.reporters$q7.responses.cleaned[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==7 & Data.Usable.mb=="Yes"))
  ph2.reporters$q8.responses.cleaned[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==8 & Data.Usable.mb=="Yes"))
  ph2.reporters$q9.responses.cleaned[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==9 & Data.Usable.mb=="Yes"))
  ph2.reporters$q10.responses.cleaned[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==10 & Data.Usable.mb=="Yes"))
  ph2.reporters$q11.responses.cleaned[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==11 & Data.Usable.mb=="Yes"))
  ph2.reporters$q12.responses.cleaned[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==12 & Data.Usable.mb=="Yes"))
  ph2.reporters$q13.responses.cleaned[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==13 & Data.Usable.mb=="Yes"))
  ph2.reporters$q14.responses.cleaned[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==14 & Data.Usable.mb=="Yes"))
  ph2.reporters$q15.responses.cleaned[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==15 & Data.Usable.mb=="Yes"))
}

for (i in 1:nrow(ph2.reporters)){
  ph2.reporters$q1.responses.all[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==1))
  ph2.reporters$q2.responses.all[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==2))
  ph2.reporters$q3.responses.all[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==3))
  ph2.reporters$q4.responses.all[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==4))
  ph2.reporters$q5.responses.all[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==5))
  ph2.reporters$q6.responses.all[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==6))
  ph2.reporters$q7.responses.all[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==7))
  ph2.reporters$q8.responses.all[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==8))
  ph2.reporters$q9.responses.all[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==9))
  ph2.reporters$q10.responses.all[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==10))
  ph2.reporters$q11.responses.all[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==11))
  ph2.reporters$q12.responses.all[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==12))
  ph2.reporters$q13.responses.all[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==13))
  ph2.reporters$q14.responses.all[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==14))
  ph2.reporters$q15.responses.all[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==15))
}

#Indicator Variable for Recruitment Phase
ph2.reporters$Recruited.Phase2 <- ifelse(ph2.reporters$Recruitment.Phase==2,1,0)
```

## Figure 2: Map of Treatment Assignments

```{r fig2}
layout(matrix(1:3, ncol = 3))
par(mar = c(1, 2.1, 2, 2.1), cex=0.85)

random.recruit <- resp1.zones$X[resp1.zones$assignment=="random"]
nominate.recruit <- resp1.zones$X[resp1.zones$assignment=="nomination"]

plot(lc1, col = "lightgrey")
title("Phase 1 Recruitment", line=-1)
plot(lc1[row.names(lc1@data) %in% random.recruit, ], col = "blue", add = TRUE) #Adding randomly assigned zones to city map
plot(lc1[row.names(lc1@data) %in% nominate.recruit, ], col = "red", add = TRUE) #Adding randomly assigned zones to city map
legend("bottomleft", legend=c("Random","Nominated"), cex=1, 
       fill=c("blue","red"), bty = "n") 

ex.sample2 <- subset(ex.sample2, InSample==1)
random.recruit <- ex.sample2$X[ex.sample2$lc1.recruit==0 & ex.sample2$lc1.announce==0]
random.recruit.lc1a <- ex.sample2$X[ex.sample2$lc1.recruit==0 & ex.sample2$lc1.announce==1]
lc1.recruit <- ex.sample2$X[ex.sample2$lc1.recruit==1 & ex.sample2$lc1.announce==0]
lc1.recruit.lc1a <- ex.sample2$X[ex.sample2$lc1.recruit==1 & ex.sample2$lc1.announce==1]

plot(lc1, col = "lightgrey") #Plotting the shapefiles that make up the city map
title("Phase 2 Recruitment", line=-1)
plot(lc1[row.names(lc1@data) %in% random.recruit, ], col = "cornflowerblue", add = TRUE)
plot(lc1[row.names(lc1@data) %in% random.recruit.lc1a, ], col = "blue3", add = TRUE)
plot(lc1[row.names(lc1@data) %in% lc1.recruit, ], col = "coral", add = TRUE)
plot(lc1[row.names(lc1@data) %in% lc1.recruit.lc1a, ], col = "coral4", add = TRUE)
legend("bottomleft", legend=c("Random","Random/Announcement","LC1 Nomination","LC1 Nomination/Announcement"), cex=1, 
       fill=c("cornflowerblue","blue3","coral","coral4"), bty = "n")

p1.resp1 <- resp1.zones$X[resp1.zones$responsiveness==1]
p1.resp0 <- resp1.zones$X[resp1.zones$responsiveness==0]
p2.resp1 <- ex.sample2$X[ex.sample2$Responsiveness==1]
p2.resp0 <- ex.sample2$X[ex.sample2$Responsiveness==0]

resp1 <- c(p1.resp1, p2.resp1) #All zones in responsiveness treatment
resp0 <-c(p1.resp0, p2.resp0) #All zones in control

plot(lc1, col = "lightgrey") #Plotting the shapefiles that make up the city map
title("Government Responsiveness", line=-1)
plot(lc1[row.names(lc1@data) %in% resp0, ], col = "blue", add = TRUE) #Adding randomly assigned zones to city map
plot(lc1[row.names(lc1@data) %in% resp1, ], col = "red", add = TRUE) #Adding randomly assigned zones to city map
legend("bottomleft", legend=c("Control","Responsiveness"), cex=1, 
       fill=c("blue","red"), bty = "n") 
```

## Table 1: Descriptive statistics of reporters recruited in both Phases

```{r tab1-descriptives}

##############Descriptive data about Phase 1 reporters

se.boot.cont <- function(vector,sims=10000){
  store <- rep(NA,sims)
  set.seed(201)
  for (i in 1:sims){
    store[i] <- mean(sample(vector, replace=T), na.rm=T)
  }
  return(sd(store))
}

mean(output$years_zone[output$assignment=="random"])
mean(output$years_zone[output$assignment=="nomination"])

se.boot.cont(output$years_zone[output$assignment=="random"])
se.boot.cont(output$years_zone[output$assignment=="nomination"])

t.test(output$years_zone[output$assignment=="random"],output$years_zone[output$assignment=="nomination"])

mean(output$female[output$assignment=="random"])
mean(output$female[output$assignment=="nomination"])

se.boot.cont(output$female[output$assignment=="random"])
se.boot.cont(output$female[output$assignment=="nomination"])

t.test(output$female[output$assignment=="random"],output$female[output$assignment=="nomination"])

mean(output$age[output$assignment=="random"])
mean(output$age[output$assignment=="nomination"])

se.boot.cont(output$age[output$assignment=="random"])
se.boot.cont(output$age[output$assignment=="nomination"])

t.test(output$age[output$assignment=="random"],output$age[output$assignment=="nomination"])

mean(output$satisfied[output$assignment=="random"])
mean(output$satisfied[output$assignment=="nomination"], na.rm=T)

se.boot.cont(output$satisfied[output$assignment=="random"])
se.boot.cont(output$satisfied[output$assignment=="nomination"])

t.test(output$satisfied[output$assignment=="random"],output$satisfied[output$assignment=="nomination"])

table(output$assignment)

##############Descriptive data about Phase 2 reporters

for (i in 1:nrow(ph2.reporters)){
  if(ph2.reporters$Recruitment.Phase[i]==2){
  sub <- subset(p2.reporters, mobile_number==ph2.reporters$Mobile.Number[i])
  ph2.reporters$years_zone[i] <- sub[1,9]
  ph2.reporters$female[i] <- ifelse(sub[1,10]=="female", 1, 0)
  ph2.reporters$age[i] <- sub[1,11]
  ph2.reporters$satisfied[i] <- ifelse(sub[1,28]=="very_satisfied" | sub[1,28]=="satisfied",1,0)
  ph2.reporters$satisfied[i] <- ifelse(sub[1,28]=="refused_to_ans",NA,ph2.reporters$satisfied[i])
  ph2.reporters$Nominated[i] <- ifelse(ph2.reporters$recruitment[i]=="lc1.recruit",1,0)
  }
  
  if(ph2.reporters$Recruitment.Phase[i]==1){
    ph2.reporters$years_zone[i] <- NA
    ph2.reporters$female[i] <- NA
    ph2.reporters$age[i] <- NA
    ph2.reporters$satisfied[i] <- NA
    ph2.reporters$Nominated[i] <- NA
  }
}

ph2.p2.reporters <- subset(ph2.reporters, Recruitment.Phase==2)

mean(ph2.p2.reporters$years_zone[ph2.p2.reporters$Nominated==0])
mean(ph2.p2.reporters$years_zone[ph2.p2.reporters$Nominated==1])

se.boot.cont(ph2.p2.reporters$years_zone[ph2.p2.reporters$Nominated==0])
se.boot.cont(ph2.p2.reporters$years_zone[ph2.p2.reporters$Nominated==1])

t.test(ph2.p2.reporters$years_zone[ph2.p2.reporters$Nominated==0],ph2.p2.reporters$years_zone[ph2.p2.reporters$Nominated==1])

mean(ph2.p2.reporters$female[ph2.p2.reporters$Nominated==0])
mean(ph2.p2.reporters$female[ph2.p2.reporters$Nominated==1])

se.boot.cont(ph2.p2.reporters$female[ph2.p2.reporters$Nominated==0])
se.boot.cont(ph2.p2.reporters$female[ph2.p2.reporters$Nominated==1])

t.test(ph2.p2.reporters$female[ph2.p2.reporters$Nominated==0],ph2.p2.reporters$female[ph2.p2.reporters$Nominated==1])

mean(ph2.p2.reporters$age[ph2.p2.reporters$Nominated==0])
mean(ph2.p2.reporters$age[ph2.p2.reporters$Nominated==1])

se.boot.cont(ph2.p2.reporters$age[ph2.p2.reporters$Nominated==0])
se.boot.cont(ph2.p2.reporters$age[ph2.p2.reporters$Nominated==1])

t.test(ph2.p2.reporters$age[ph2.p2.reporters$Nominated==0],ph2.p2.reporters$age[ph2.p2.reporters$Nominated==1])

mean(ph2.p2.reporters$satisfied[ph2.p2.reporters$Nominated==0])
mean(ph2.p2.reporters$satisfied[ph2.p2.reporters$Nominated==1], na.rm=T)

se.boot.cont(ph2.p2.reporters$satisfied[ph2.p2.reporters$Nominated==0])
se.boot.cont(ph2.p2.reporters$satisfied[ph2.p2.reporters$Nominated==1])

t.test(ph2.p2.reporters$satisfied[ph2.p2.reporters$Nominated==0],ph2.p2.reporters$satisfied[ph2.p2.reporters$Nominated==1])

table(ph2.p2.reporters$Nominated, useNA = "always")
```


## Figure 3: Reporting by recruitment condition during Phase 1

```{r fig3}

##Analysis
vlist <- unique(output$zone.id)
treat <- c(rep(0,45),rep(1,45))
reps <- 10000
store.ever <- rep(NA, reps)
store.total <- rep(NA, reps)
store.open <- rep(NA, reps)
store.first4 <- rep(NA, reps)
store.last4 <- rep(NA, reps)
set.seed(201)
for (i in 1:reps){
  treat.ran <- sample(treat)
  
  for (j in 1:length(vlist)){
    output$treat[output$zone.id==vlist[j]] <- treat.ran[j]
  }
  
  store.ever[i] <- mean(output$active.ever[output$treat==1]) - mean(output$active.ever[output$treat==0])
  store.total[i] <- mean(output$responses[output$treat==1]) - mean(output$responses[output$treat==0])
  store.open[i] <- mean(output$open.responses[output$treat==1]) - mean(output$open.responses[output$treat==0])
  store.first4[i] <- mean(output$first4weeks.responses[output$treat==1]) - mean(output$first4weeks.responses[output$treat==0])
  store.last4[i] <- mean(output$last4weeks.responses[output$treat==1]) - mean(output$last4weeks.responses[output$treat==0])
}

ob.ever <- mean(output$active.ever[output$assignment=="nomination"]) - mean(output$active.ever[output$assignment=="random"])
ob.ever
sum(ob.ever < store.ever) / reps #This is the p-value

ob.total <- mean(output$responses[output$assignment=="nomination"]) - mean(output$responses[output$assignment=="random"])
ob.total
sum(ob.total < store.total) / reps #This is the p-value

ob.open <- mean(output$open.responses[output$assignment=="nomination"]) - mean(output$open.responses[output$assignment=="random"])
ob.open
sum(ob.open < store.open) / reps #This is the p-value

ob.first4 <- mean(output$first4weeks.responses[output$assignment=="nomination"]) - mean(output$first4weeks.responses[output$assignment=="random"])
sum(ob.first4 < store.first4) / reps #This is the p-value

ob.last4 <- mean(output$last4weeks.responses[output$assignment=="nomination"]) - mean(output$last4weeks.responses[output$assignment=="random"])
sum(ob.last4 < store.last4) / reps #This is the p-value

##Figure
library(ggplot2) #version 3.2.1
library(reshape2) #version 1.4.3
source("multiplot.R")

#####Combined figure for pre-registered hypothesis tests (v2 paper)
a<-data.frame(prop.table(table(output$assignment,output$active.ever),1))[3:4,]
names(a) <- c("assignment","active.ever","prop")
a<-within(a, assignment <- factor(assignment, levels=c("nomination","random"), labels=c("nomination","street")))
table(output$assignment,output$active.ever)
ran.means <- rep(NA,1000)
nom.means <- rep(NA,1000)
table(output$assignment,output$active.ever) #Used to get SE bars
set.seed(201)
for (i in 1:1000){
  ran.means[i] <- mean(sample(c(rep(0,447),rep(1,70)), replace=T))
  nom.means[i] <- mean(sample(c(rep(0,435),rep(1,82)), replace=T))
} #table(output$assignment,output$active.ever) for these numbers
a$se <- c(sd(ran.means),sd(nom.means))
se.bars <- aes(ymax = a$prop + a$se, ymin = a$prop - a$se)
ever <- ggplot(data=a, aes(x=assignment, y=prop)) + geom_bar(stat="identity") + geom_errorbar(se.bars, width=0.3) + ylab("Proportion Ever Responding") + ggtitle("(A) Ever Responding") + theme(plot.title = element_text(lineheight=1, face="bold"))


responses <- c(sum(output$responses[output$assignment=="nomination"]),
               sum(output$responses[output$assignment=="random"]))
reporters <- as.numeric(table(output$assignment))
b <- data.frame(responses,reporters)
row.names(b) <- c("nomination","street")
b$avg.responses <- b$responses/b$reporters
b$assignment <- row.names(b)
ran.means <- rep(NA,1000)
nom.means <- rep(NA,1000)
set.seed(201)
for (i in 1:1000){
  ran.means[i] <- mean(sample(output$responses[output$assignment=="random"], replace=T))
  nom.means[i] <- mean(sample(output$responses[output$assignment=="nomination"], replace=T))
}
b$se <- c(sd(ran.means),sd(nom.means))
se.bars.b <- aes(ymax = b$avg.responses + b$se, ymin = b$avg.responses - b$se)
avg <- ggplot(data=b, aes(x=assignment, y=avg.responses)) + geom_bar(stat="identity") + geom_errorbar(se.bars.b, width=0.3) + ylab("Total Responses Per Reporter") + ggtitle("(B) Average Responses") + theme(plot.title = element_text(lineheight=1, face="bold"))


open.responses <- c(sum(output$open.responses[output$assignment=="nomination"]),
               sum(output$open.responses[output$assignment=="random"]))
reporters <- as.numeric(table(output$assignment))
c <- data.frame(open.responses,reporters)
row.names(c) <- c("nomination","street")
c$avg.open.responses <- c$open.responses/c$reporters
c$assignment <- row.names(c)

ran.means <- rep(NA,1000)
nom.means <- rep(NA,1000)
set.seed(201)
for (i in 1:1000){
  ran.means[i] <- mean(sample(output$open.responses[output$assignment=="random"], replace=T))
  nom.means[i] <- mean(sample(output$open.responses[output$assignment=="nomination"], replace=T))
}
c$se <- c(sd(ran.means),sd(nom.means))
se.bars.c <- aes(ymax = c$avg.open.responses + c$se, ymin = c$avg.open.responses - c$se)

open <- ggplot(data=c, aes(x=assignment, y=avg.open.responses)) + geom_bar(stat="identity") + geom_errorbar(se.bars.c, width=0.3) + ylab("Open Responses Per Reporter") + ggtitle("(C) Open Responses") + theme(plot.title = element_text(lineheight=1, face="bold"))

#pdf("Provision_CombinedPhase1Results_170306.pdf",width=7.5,height=4)
#pdf("Provision_CombinedPhase1Results_180918.pdf",width=8,height=4)
#pdf("Provision_CombinedPhase1Results_181009.pdf",width=8,height=4)
multiplot(ever,avg,open,cols=3)
#dev.off()
```

## Table 2: Total number of active reporters during Phase 2

```{r tab-2}
### Total number of active reporters during Phase 2 ####
###Active Reporters, cluster-robust SE
mod.pooled.clst <- lm_robust(active.ever ~ Responsiveness + recruitment + lc1.announce + Recruited.Phase2, data=ph2.reporters,
                        clusters = location.id)
mod.pooled.ci <- cbind(mod.pooled.clst$conf.low, mod.pooled.clst$conf.high)

mod.one.clst <- lm_robust(active.ever ~ Responsiveness + recruitment, data=ph2.reporters[ph2.reporters$Recruited.Phase2==0,],
                     clusters = location.id)
mod.one.ci <- cbind(mod.one.clst$conf.low, mod.one.clst$conf.high)

mod.two.clst <- lm_robust(active.ever ~ Responsiveness + recruitment + lc1.announce, data=ph2.reporters[ph2.reporters$Recruited.Phase2==1,],
                     clusters = location.id)
mod.two.ci <- cbind(mod.two.clst$conf.low, mod.two.clst$conf.high)


###Active Reporters, naive for model objects
mod.pooled <- lm(active.ever ~ Responsiveness + recruitment + lc1.announce + Recruited.Phase2, data=ph2.reporters)

mod.one <- lm(active.ever ~ Responsiveness + recruitment, data=ph2.reporters[ph2.reporters$Recruited.Phase2==0,])

mod.two <- lm(active.ever ~ Responsiveness + recruitment + lc1.announce, data=ph2.reporters[ph2.reporters$Recruited.Phase2==1,])

#Table
stargazer(mod.pooled, mod.one, mod.two, type = "latex", 
          dep.var.caption  = "DV: At Least One Report During Phase 2",
          dep.var.labels.include = FALSE,
          covariate.labels = c("Responsiveness","Neighbor Nomination","LC1 Nomination","LC1 Announcement","Phase 2","Intercept"),
          ci = TRUE,
          ci.custom = list(mod.pooled.ci,
                           mod.one.ci,
                           mod.two.ci),
          df = FALSE, omit.stat = c("rsq","ser","f"),
          intercept.bottom = TRUE,
          notes = "", notes.append = FALSE, notes.label = "",
          column.labels = c("(Pooled)","(P1 Recruits)","(P2 Recruits)"),
          report = c('vcs'), model.numbers = FALSE
) #Date and time: Tue, Sep 25, 2018 - 12:05:07
```

## Table 3: Total number of reports submitted by each reporter during Phase 2

```{r tab3}
### Total number of reports submitted by each reporter during Phase 2 ####
###Total responses, cluster-robust SE
mod.pooled.clst <- lm_robust(total.responses ~ Responsiveness + recruitment + lc1.announce + Recruited.Phase2, data=ph2.reporters,
                             clusters = location.id)
mod.pooled.ci <- cbind(mod.pooled.clst$conf.low, mod.pooled.clst$conf.high)

mod.one.clst <- lm_robust(total.responses ~ Responsiveness + recruitment, data=ph2.reporters[ph2.reporters$Recruited.Phase2==0,],
                          clusters = location.id)
mod.one.ci <- cbind(mod.one.clst$conf.low, mod.one.clst$conf.high)

mod.two.clst <- lm_robust(total.responses ~ Responsiveness + recruitment + lc1.announce, data=ph2.reporters[ph2.reporters$Recruited.Phase2==1,],
                          clusters = location.id)
mod.two.ci <- cbind(mod.two.clst$conf.low, mod.two.clst$conf.high)

###Total responses, naive for model objects
mod.pooled <- lm(total.responses ~ Responsiveness + recruitment + lc1.announce + Recruited.Phase2, data=ph2.reporters)

mod.one <- lm(total.responses ~ Responsiveness + recruitment, data=ph2.reporters[ph2.reporters$Recruited.Phase2==0,])

mod.two <- lm(total.responses ~ Responsiveness + recruitment + lc1.announce, data=ph2.reporters[ph2.reporters$Recruited.Phase2==1,])

#Table
stargazer(mod.pooled, mod.one, mod.two, type = "latex", 
          dep.var.caption  = "DV: Total Number of Reports During Phase 2",
          dep.var.labels.include = FALSE,
          covariate.labels = c("Responsiveness","Neighbor Nomination","LC1 Nomination","LC1 Announcement","Phase 2","Intercept"),
          ci = TRUE,
          ci.custom = list(mod.pooled.ci,
                           mod.one.ci,
                           mod.two.ci),
          df = FALSE, omit.stat = c("rsq","ser","f"),
          intercept.bottom = TRUE,
          notes = "", notes.append = FALSE, notes.label = "",
          column.labels = c("(Pooled)","(P1 Recruits)","(P2 Recruits)"),
          report = c('vcs'), model.numbers = FALSE
) #Date and time: Tue, Sep 25, 2018 - 12:07:27

```

## Table 4: Number of reports submitted by each reporter during the last two weeks of Phase 2

```{r tab4}
### Number of reports submitted by each reporter during the last two weeks of Phase 2 ####
###Last two weeks, cluster-robust SE
mod.pooled.clst <- lm_robust(last2week.responses ~ Responsiveness + recruitment + lc1.announce + Recruited.Phase2, data=ph2.reporters,
                             clusters = location.id)
mod.pooled.ci <- cbind(mod.pooled.clst$conf.low, mod.pooled.clst$conf.high)

mod.one.clst <- lm_robust(last2week.responses ~ Responsiveness + recruitment, data=ph2.reporters[ph2.reporters$Recruited.Phase2==0,],
                          clusters = location.id)
mod.one.ci <- cbind(mod.one.clst$conf.low, mod.one.clst$conf.high)

mod.two.clst <- lm_robust(last2week.responses ~ Responsiveness + recruitment + lc1.announce, data=ph2.reporters[ph2.reporters$Recruited.Phase2==1,],
                          clusters = location.id)
mod.two.ci <- cbind(mod.two.clst$conf.low, mod.two.clst$conf.high)

###Last two weeks, naive for model objects
mod.pooled <- lm(last2week.responses ~ Responsiveness + recruitment + lc1.announce + Recruited.Phase2, data=ph2.reporters)

mod.one <- lm(last2week.responses ~ Responsiveness + recruitment, data=ph2.reporters[ph2.reporters$Recruited.Phase2==0,])

mod.two <- lm(last2week.responses ~ Responsiveness + recruitment + lc1.announce, data=ph2.reporters[ph2.reporters$Recruited.Phase2==1,])

#Table
stargazer(mod.pooled, mod.one, mod.two, type = "latex", 
          dep.var.caption  = "DV: Total Number of Reports During Last Two Weeks of Phase 2",
          dep.var.labels.include = FALSE,
          covariate.labels = c("Responsiveness","Neighbor Nomination","LC1 Nomination","LC1 Announcement","Phase 2","Intercept"),
          ci = TRUE,
          ci.custom = list(mod.pooled.ci,
                           mod.one.ci,
                           mod.two.ci),
          df = FALSE, omit.stat = c("rsq","ser","f"),
          intercept.bottom = TRUE,
          notes = "", notes.append = FALSE, notes.label = "",
          column.labels = c("(Pooled)","(P1 Recruits)","(P2 Recruits)"),
          report = c('vcs'), model.numbers = FALSE
) #Date and time: Tue, Sep 25, 2018 - 12:08:21
```

## Figure 4: Proportion of reporters responding to each prompt during Phase 2 by phase of recruitment

```{r fig4}
###Figure of reporting as a function of responsiveness condition ----
#Note: dividing all counts by number of reporters to form proportion reporting
#pdf("fig5-190305.pdf", width = 6, height=6)
qs <- c("Q1","Q2","Q3","Q4","Q5","Q6","Q7","Q8","Q9","Q10","Q11","Q12","Q13","Q14","Q15")
layout(matrix(1:3, ncol = 1))
par(mar = c(2, 6.1, 2, 2.1))

res1 <- apply(ph2.zones[ph2.zones$responsiveness==1,10:24], 2, sum) / sum(ph2.zones$total.reporters[ph2.zones$responsiveness==1])
res0 <- apply(ph2.zones[ph2.zones$responsiveness==0,10:24], 2, sum) / sum(ph2.zones$total.reporters[ph2.zones$responsiveness==0])
x <- 1:15
plot(x=x, y=res1, type="l", col="red", lwd=3, xlab="Question Number", ylab="Proportion Reporting", ylim=c(0,0.25), main="All Reporters", xaxt="n")
axis(1, at=1:15, labels=qs)
lines(x=x, y=res0, col="grey", lwd=3)

res1 <- apply(ph2.zones[ph2.zones$responsiveness==1 & ph2.zones$phase=="one", 10:24], 2, sum) / sum(ph2.zones$total.reporters[ph2.zones$responsiveness==1 & ph2.zones$phase=="one"])
res0 <- apply(ph2.zones[ph2.zones$responsiveness==0 & ph2.zones$phase=="one", 10:24], 2, sum) / sum(ph2.zones$total.reporters[ph2.zones$responsiveness==0 & ph2.zones$phase=="one"])
x <- 1:15
plot(x=x, y=res1, type="l", col="red", lwd=3, xlab="Question Number", ylab="Proportion Reporting", ylim=c(0,0.18), main="Phase 1 Recruits", xaxt="n")
axis(1, at=1:15, labels=qs)
lines(x=x, y=res0, col="grey", lwd=3)

res1 <- apply(ph2.zones[ph2.zones$responsiveness==1 & ph2.zones$phase=="two", 10:24], 2, sum) / sum(ph2.zones$total.reporters[ph2.zones$responsiveness==1 & ph2.zones$phase=="two"])
res0 <- apply(ph2.zones[ph2.zones$responsiveness==0 & ph2.zones$phase=="two", 10:24], 2, sum) / sum(ph2.zones$total.reporters[ph2.zones$responsiveness==0 & ph2.zones$phase=="two"])
x <- 1:15
plot(x=x, y=res1, type="l", col="red", lwd=3, xlab="Question Number", ylab="Proportion Reporting", ylim=c(0,0.3), main="Phase 2 Recruits",xaxt="n")
axis(1, at=1:15, labels=qs)
lines(x=x, y=res0, col="grey", lwd=3)
#dev.off()
```

## Figure 5: Attitudinal and behavioral responses to the Responsiveness treatment

```{r fig5}
ph2.endline <- read.csv("./ph2_endline_attitudes_survey.csv", stringsAsFactors=FALSE)

ph2.endline <- subset(ph2.endline, subject_id%in%ph2.reporters$Subject.ID)

for (i in 1:nrow(ph2.endline)){
  ph2.endline$responsiveness[i] <- ph2.reporters[ph2.reporters$Subject.ID==ph2.endline$subject_id[i],8]
  ph2.endline$recruitment.phase[i] <- ph2.reporters[ph2.reporters$Subject.ID==ph2.endline$subject_id[i],9]
  ph2.endline$location.id[i] <- ph2.reporters[ph2.reporters$Subject.ID==ph2.endline$subject_id[i],12]
}

prop.table(table(ph2.endline$responsiveness, ph2.endline$how_often_is_the_kcca_responsive_to_concerns_of_kampala_residents_),1)
prop.table(table(ph2.endline$responsiveness, ph2.endline$how_much_of_the_time_do_you_think_you_can_trust_the_kcca_to_do_what_is_right_),1)
prop.table(table(ph2.endline$responsiveness, ph2.endline$how_satisfied_are_you_with_rubbish_collection_services_in_your_zone_),1)

ph2.endline.one <- subset(ph2.endline, recruitment.phase==1)
prop.table(table(ph2.endline.one$responsiveness, ph2.endline.one$how_often_is_the_kcca_responsive_to_concerns_of_kampala_residents_),1)
prop.table(table(ph2.endline.one$responsiveness, ph2.endline.one$how_much_of_the_time_do_you_think_you_can_trust_the_kcca_to_do_what_is_right_),1)
prop.table(table(ph2.endline.one$responsiveness, ph2.endline.one$how_satisfied_are_you_with_rubbish_collection_services_in_your_zone_),1)

ph2.endline.two <- subset(ph2.endline, recruitment.phase==2)
prop.table(table(ph2.endline.two$responsiveness, ph2.endline.two$how_often_is_the_kcca_responsive_to_concerns_of_kampala_residents_),1)
prop.table(table(ph2.endline.two$responsiveness, ph2.endline.two$how_much_of_the_time_do_you_think_you_can_trust_the_kcca_to_do_what_is_right_),1)
prop.table(table(ph2.endline.two$responsiveness, ph2.endline.two$how_satisfied_are_you_with_rubbish_collection_services_in_your_zone_),1)

#Checking random assignment in ph2.reporters file
for (i in 1:nrow(ph2.endline)){
  ph2.endline$responsiveness.check[i] <- subset(ex.sample2, location.id==ph2.endline$location.id[i])[1,17]
}
ph2.endline.two <- subset(ph2.endline, recruitment.phase==2)
ph2.endline.two$assigned.comply <- ifelse(ph2.endline.two$responsiveness == ph2.endline.two$responsiveness.check, 1,0)
ph2.endline.two$assigned.comply[is.na(ph2.endline.two$assigned.comply)] <- 0

see <- subset(ph2.endline.two, assigned.comply==0)
unique(see$location.id)
#central.bukesa.kakato ii: it appears that wrong responsiveness treatment was administered
#makindye.makindye i.kipamba: out-of-sample replacement zone; deviation from sampling protocol
#rubaga.namirembe.mengo town: out-of-sample replacement zone; deviation from sampling protocol
#makindye.katwe i.musoke: out-of-sample replacement zone; deviation from sampling protocol
ph2.endline.two <- subset(ph2.endline.two, assigned.comply==1)
prop.table(table(ph2.endline.two$responsiveness, ph2.endline.two$how_often_is_the_kcca_responsive_to_concerns_of_kampala_residents_),1)
prop.table(table(ph2.endline.two$responsiveness, ph2.endline.two$how_much_of_the_time_do_you_think_you_can_trust_the_kcca_to_do_what_is_right_),1)
prop.table(table(ph2.endline.two$responsiveness, ph2.endline.two$how_satisfied_are_you_with_rubbish_collection_services_in_your_zone_),1)

for (i in 1:nrow(ph2.endline)){
  ph2.endline$responsiveness.check[i] <- subset(resp1.zones, location.id==ph2.endline$location.id[i])[1,11]
}
ph2.endline.one <- subset(ph2.endline, recruitment.phase==1)
ph2.endline.one$responsiveness == ph2.endline.one$responsiveness.check
#Note: all responsiveness assignments to Ph1 reporters check out

#To Do:check balance on pre-treatment covariates
for (i in 1:nrow(ph2.endline)){
  
  if(ph2.endline$recruitment.phase[i]==1){
    ph2.endline$waste.satisfaction.baseline[i] <- subset(ph1.reporters, mobile_number==ph2.endline$Phone.Number.Cleaned[i])[1,"satisfaction_rubbish_collectio"]
  }
  
  if(ph2.endline$recruitment.phase[i]==2){
    ph2.endline$waste.satisfaction.baseline[i] <- subset(p2.reporters, mobile_number==ph2.endline$Phone.Number.Cleaned[i])[1,"satisfaction_rubbish_collectio"]
  }
}

prop.table(table(ph2.endline$responsiveness, ph2.endline$waste.satisfaction.baseline),1)

ph2.endline$waste.satisfaction.baseline <- factor(ph2.endline$waste.satisfaction.baseline, levels=c("very_dissatisf","dissatisfied","refused_to_ans","neither_satisf","satisfied","very_satisfied"))

ph2.endline$responsive.num[ph2.endline$how_often_is_the_kcca_responsive_to_concerns_of_kampala_residents_=="almost_never"] <- 1
ph2.endline$responsive.num[ph2.endline$how_often_is_the_kcca_responsive_to_concerns_of_kampala_residents_=="only_some_of_t"] <- 2
ph2.endline$responsive.num[ph2.endline$how_often_is_the_kcca_responsive_to_concerns_of_kampala_residents_=="most_of_the_ti"] <- 3
ph2.endline$responsive.num[ph2.endline$how_often_is_the_kcca_responsive_to_concerns_of_kampala_residents_=="almost_always"] <- 4

ph2.endline$trust.num[ph2.endline$how_much_of_the_time_do_you_think_you_can_trust_the_kcca_to_do_what_is_right_=="almost_never"] <- 1
ph2.endline$trust.num[ph2.endline$how_much_of_the_time_do_you_think_you_can_trust_the_kcca_to_do_what_is_right_=="only_some_of_t"] <- 2
ph2.endline$trust.num[ph2.endline$how_much_of_the_time_do_you_think_you_can_trust_the_kcca_to_do_what_is_right_=="most_of_the_ti"] <- 3
ph2.endline$trust.num[ph2.endline$how_much_of_the_time_do_you_think_you_can_trust_the_kcca_to_do_what_is_right_=="almost_always"] <- 4

ph2.endline$satisfaction.num[ph2.endline$how_satisfied_are_you_with_rubbish_collection_services_in_your_zone_=="very_dissatisf"] <- 1
ph2.endline$satisfaction.num[ph2.endline$how_satisfied_are_you_with_rubbish_collection_services_in_your_zone_=="dissatisfied"] <- 2
ph2.endline$satisfaction.num[ph2.endline$how_satisfied_are_you_with_rubbish_collection_services_in_your_zone_=="neither_satisf"] <- 3
ph2.endline$satisfaction.num[ph2.endline$how_satisfied_are_you_with_rubbish_collection_services_in_your_zone_=="satisfied"] <- 4
ph2.endline$satisfaction.num[ph2.endline$how_satisfied_are_you_with_rubbish_collection_services_in_your_zone_=="very_satisfied"] <- 5

mod.responsive <- lm(responsive.num ~ responsiveness + waste.satisfaction.baseline, data=ph2.endline)
summary(mod.responsive)

mod.trust <- lm(trust.num ~ responsiveness + waste.satisfaction.baseline, data=ph2.endline)
summary(mod.trust)

mod.satisfaction <- lm(satisfaction.num ~ responsiveness + waste.satisfaction.baseline, data=ph2.endline)
summary(mod.satisfaction)

## Attrition by treatment condition
for (i in 1:nrow(ph2.reporters)){
  ph2.reporters$endline.attrition[i] <- ifelse(nrow(subset(ph2.endline, subject_id==ph2.reporters$Subject.ID[i] & Do_you_consent_to_this_intervi=="yes"))>0, 0, 1)
}

chisq.test(table(ph2.reporters$Responsiveness, ph2.reporters$endline.attrition))
prop.table(table(ph2.reporters$Responsiveness, ph2.reporters$endline.attrition),1) #No problems with differential attrition by responsiveness condition in prop.table

for (i in 1:nrow(ph2.reporters)){
  
  if(ph2.reporters$Recruitment.Phase[i]==1){
    ph2.reporters$waste.satisfaction.baseline[i] <- subset(ph1.reporters, mobile_number==ph2.reporters$Mobile.Number[i])[1,13]
  }
  
  if(ph2.reporters$Recruitment.Phase[i]==2){
    ph2.reporters$waste.satisfaction.baseline[i] <- subset(p2.reporters, mobile_number==ph2.reporters$Mobile.Number[i])[1,28]
  }
}

ph2.reporters$waste.satisfaction.baseline <- factor(ph2.reporters$waste.satisfaction.baseline, levels=c("very_dissatisf","dissatisfied","refused_to_ans","neither_satisf","satisfied","very_satisfied"))
mod <- lm(endline.attrition ~ Responsiveness*waste.satisfaction.baseline, data=ph2.reporters)
summary(mod) #No indication of differential attrition in baseline subgroups by treatment condition


###Inteded behavior
#ph2.endline$responsiveness <- ifelse(ph2.endline$subject_id %in% ph2.reporters$Subject.ID[ph2.reporters$Responsiveness==1], 1, 0)
ph2.endline.v.eligible <- subset(ph2.endline, Do_you_consent_to_this_intervi=="yes")

ph2.volunteer <- read.csv("./ph2_endline_volunteer_outcome1.csv", comment.char="#", na.strings="N/A", stringsAsFactors=FALSE)
ph2.volunteer.yes <- subset(ph2.volunteer, Data.Usable.=="Yes" & Cleaned.Data=="VOLUNTEER")

ph2.volunteer.p2 <- read.csv("./ph2_endline_volunteer_outcome2.csv", comment.char="#", na.strings="N/A", stringsAsFactors=FALSE)
ph2.volunteer.p2.yes <- subset(ph2.volunteer.p2, Data.Usable.=="Yes" & Cleaned.Data=="Volunteer")

###This is the analysis for the phone call only request, no text reminder
for (i in 1:nrow(ph2.endline.v.eligible)){
  sub <- subset(ph2.volunteer.yes, Subject.ID==ph2.endline.v.eligible$subject_id[i])
  ph2.endline.v.eligible$volunteered[i] <- ifelse(nrow(sub)>0,1,0)
}

table(ph2.endline.v.eligible$volunteered) #172
unique(ph2.volunteer.yes$Subject.ID)
length(unique(ph2.volunteer.yes$Subject.ID)) #180
unique(ph2.volunteer.yes$Subject.ID)[!(unique(ph2.volunteer.yes$Subject.ID) %in% ph2.endline.v.eligible$subject_id[ph2.endline.v.eligible$volunteered==1])]
#Note: see 10/18/2016 email about subject non-compliance

mod <- lm(volunteered ~ responsiveness, data=ph2.endline.v.eligible)
summary(mod)

###This is the analysis for the phone call only request, no text reminder
for (i in 1:nrow(ph2.endline)){
  sub <- subset(ph2.volunteer.yes, Subject.ID==ph2.endline$subject_id[i])
  sub2 <- subset(ph2.volunteer.p2.yes, Subject.ID==ph2.endline$subject_id[i])
  ph2.endline$volunteered[i] <- ifelse(nrow(sub)>0 | nrow(sub2)>0, 1, 0)
}

table(ph2.endline$volunteered) #263

mod <- lm(volunteered ~ responsiveness, data=ph2.endline)
summary(mod)

###Figure

##Subset toggle, doesn't work because of different objects
#ph2.endline <- subset(ph2.endline, recruitment.phase==1)
#ph2.endline.v.eligible <- subset(ph2.endline.v.eligible, recruitment.phase==1)

set.seed(101)
sims <- 10000

s.t <- mean(ph2.endline$satisfaction.num[ph2.endline$responsiveness==1], na.rm=T)
s.c <- mean(ph2.endline$satisfaction.num[ph2.endline$responsiveness==0], na.rm=T)

s.t.sims <- rep(NA, sims)
s.c.sims <- rep(NA, sims)

for (i in 1:sims){
  s.t.sims[i] <- mean(sample(ph2.endline$satisfaction.num[ph2.endline$responsiveness==1], replace=T), na.rm=T)
  s.c.sims[i] <- mean(sample(ph2.endline$satisfaction.num[ph2.endline$responsiveness==0], replace=T), na.rm=T)
}

r.t <- mean(ph2.endline$responsive.num[ph2.endline$responsiveness==1], na.rm=T)
r.c <- mean(ph2.endline$responsive.num[ph2.endline$responsiveness==0], na.rm=T)

r.t.sims <- rep(NA, sims)
r.c.sims <- rep(NA, sims)

for (i in 1:sims){
  r.t.sims[i] <- mean(sample(ph2.endline$responsive.num[ph2.endline$responsiveness==1], replace=T), na.rm=T)
  r.c.sims[i] <- mean(sample(ph2.endline$responsive.num[ph2.endline$responsiveness==0], replace=T), na.rm=T)
}

t.t <- mean(ph2.endline$trust.num[ph2.endline$responsiveness==1], na.rm=T)
t.c <- mean(ph2.endline$trust.num[ph2.endline$responsiveness==0], na.rm=T)

t.t.sims <- rep(NA, sims)
t.c.sims <- rep(NA, sims)

for (i in 1:sims){
  t.t.sims[i] <- mean(sample(ph2.endline$trust.num[ph2.endline$responsiveness==1], replace=T), na.rm=T)
  t.c.sims[i] <- mean(sample(ph2.endline$trust.num[ph2.endline$responsiveness==0], replace=T), na.rm=T)
}


v.t <- mean(ph2.endline.v.eligible$volunteered[ph2.endline.v.eligible$responsiveness==1], na.rm=T)
v.c <- mean(ph2.endline.v.eligible$volunteered[ph2.endline.v.eligible$responsiveness==0], na.rm=T)

v.t.sims <- rep(NA, sims)
v.c.sims <- rep(NA, sims)

for (i in 1:sims){
  v.t.sims[i] <- mean(sample(ph2.endline.v.eligible$volunteered[ph2.endline.v.eligible$responsiveness==1], replace=T), na.rm=T)
  v.c.sims[i] <- mean(sample(ph2.endline.v.eligible$volunteered[ph2.endline.v.eligible$responsiveness==0], replace=T), na.rm=T)
}

v2.t <- mean(ph2.endline$volunteered[ph2.endline$responsiveness==1], na.rm=T)
v2.c <- mean(ph2.endline$volunteered[ph2.endline$responsiveness==0], na.rm=T)

v2.t.sims <- rep(NA, sims)
v2.c.sims <- rep(NA, sims)

for (i in 1:sims){
  v2.t.sims[i] <- mean(sample(ph2.endline$volunteered[ph2.endline$responsiveness==1], replace=T), na.rm=T)
  v2.c.sims[i] <- mean(sample(ph2.endline$volunteered[ph2.endline$responsiveness==0], replace=T), na.rm=T)
}

#pdf("SolidWaste_TrustFigure_181010.pdf", height=6, width=6)
#pdf("SolidWaste_TrustFigure_Ph1Reporters_181010.pdf", height=6, width=6)
par(mfrow=c(6,1),oma=c(1,1,1,1),mar=c(3.4,2.1,1.1,2.1), mgp=c(2.2,0.8,0), bty="n", cex.lab=1.2, cex.axis=1.2)

plot(y=c(2.5,1.5),x=c(s.t,s.c), pch=c(16,1), cex=2.5, ylim=c(0.5,3.5),xlim=c(1,5), yaxt="n", ylab="", xlab="Satisfaction with Waste Services", xaxt="n", font.lab=2)
axis(1, at=c(1,2,3,4,5), labels=c("Very Dissatisfied","Dissastisfied","Neutral","Satisfied","Very Satisfied"))
lines(y=c(2.5,2.5),x=c(sort(s.t.sims)[250],sort(s.t.sims)[9750]), lwd=1.5)
lines(y=c(1.5,1.5),x=c(sort(s.c.sims)[250],sort(s.c.sims)[9750]), lwd=1.5)

plot(y=c(2.5,1.5),x=c(r.t,r.c), pch=c(16,1), cex=2.5, ylim=c(0.5,3.5),xlim=c(1,4), yaxt="n", ylab="", xlab="KCCA Responsiveness to Citizen Concerns", xaxt="n", font.lab=2)
axis(1, at=c(1,2,3,4), labels=c("Almost Never","Some of the Time","Most of the Time","Almost Always"))
lines(y=c(2.5,2.5),x=c(sort(r.t.sims)[250],sort(r.t.sims)[9750]), lwd=1.5)
lines(y=c(1.5,1.5),x=c(sort(r.c.sims)[250],sort(r.c.sims)[9750]), lwd=1.5)

plot(y=c(2.5,1.5),x=c(t.t,t.c), pch=c(16,1), cex=2.5, ylim=c(0.5,3.5),xlim=c(1,4), yaxt="n", ylab="", xlab="Trust in KCCA To Do the Right Thing", xaxt="n", font.lab=2)
axis(1, at=c(1,2,3,4), labels=c("Almost Never","Some of the Time","Most of the Time","Almost Always"))
lines(y=c(2.5,2.5),x=c(sort(t.t.sims)[250],sort(t.t.sims)[9750]), lwd=1.5)
lines(y=c(1.5,1.5),x=c(sort(t.c.sims)[250],sort(t.c.sims)[9750]), lwd=1.5)

plot(y=c(2.5,1.5),x=c(v.t,v.c), pch=c(16,1), cex=2.5, ylim=c(0.5,3.5),xlim=c(0,0.2), yaxt="n", ylab="", xlab="Proportion Volunteering to Help Develop Citizen Feedback Platform", font.lab=2)
lines(y=c(2.5,2.5),x=c(sort(v.t.sims)[250],sort(v.t.sims)[9750]), lwd=1.5)
lines(y=c(1.5,1.5),x=c(sort(v.c.sims)[250],sort(v.c.sims)[9750]), lwd=1.5)

plot(y=c(2.5,1.5),x=c(v2.t,v2.c), pch=c(16,1), cex=2.5, ylim=c(0.5,3.5),xlim=c(0,0.2), yaxt="n", ylab="", xlab="Propotion Volunteering (With SMS Reminder)", font.lab=2)
lines(y=c(2.5,2.5),x=c(sort(v2.t.sims)[250],sort(v2.t.sims)[9750]), lwd=1.5)
lines(y=c(1.5,1.5),x=c(sort(v2.c.sims)[250],sort(v2.c.sims)[9750]), lwd=1.5)

par(mar=c(0.1,0.1,0.1,0.1))
plot(1, type="n", xlim=c(0,10), ylim=c(0,2), xaxt="n", yaxt="n", ylab="", xlab="")
rect(xleft=1.2,xright=9,ybottom=1.25,ytop=1.8, col="azure2")
points(x=c(2.3,7),y=c(1.5,1.5), pch=c(16,21), bg="white", cex=2.5)
text(x=2.4, y=1.5, pos=4, "Responsiveness Treatment", cex=1.4)
text(x=7.1, y=1.5, pos=4, "Control", cex=1.4)
#dev.off()
```

## Table 5: Quality of responses by treatment condition

```{r tab5}
### Analyzing type of response (discarded, usable, usable raw) by reporter ----

### Phase 1 ----

##Creating DVs
discard.coded <- function(vector.raw,vector.clean){
  discarded <- !is.na(vector.raw) & is.na(vector.clean)
  return(discarded)
} #Did raw data need to be discarded? TRUE/FALSE (TRUE is worse)

usable.cleaning.coded <- function(vector.raw,vector.clean){
  cleaning <- !(vector.raw==vector.clean) & !is.na(vector.clean)
  return(cleaning)
} #Is the response usable after cleaning? TRUE/FALSE (TRUE is worse)

usable.raw.coded <- function(vector.raw,vector.clean){
  raw <- (vector.raw==vector.clean) & !is.na(vector.raw) & !is.na(vector.clean)
  return(raw)
} #Is the raw data usable? TRUE/FALSE (TRUE is better)

# discard.coded(ph1$question_1,ph1$q1.cleaned)
# usable.cleaning.coded(ph1$question_1,ph1$q1.cleaned)
# usable.raw.coded(ph1$question_1,ph1$q1.cleaned)
# 
# sum(discard.coded(ph1$question_1,ph1$q1.cleaned)) #3
# sum(usable.cleaning.coded(ph1$question_1,ph1$q1.cleaned)) #4
# sum(usable.raw.coded(ph1$question_1,ph1$q1.cleaned)) #29
# sum(!is.na(ph1$question_1)) #36, all checks out

#Note: Q1 is a language preference, rather than a substantive poll
#See: Phase1_OutgoingPrompts.csv for final times/dates of outgoing prompts

cleaned.vector<-c(6,9,12,15,18,21,24,27,30,33,36,39,42,45,48,51,54,57)
raw.vector <- cleaned.vector - 1

for (i in 2:length(cleaned.vector)){
  output$store.discard <- discard.coded(vector.clean=output[,cleaned.vector[i]],vector.raw=output[,cleaned.vector[i]-1])
  names(output)[ncol(output)] <- paste("q",i,".discarded.coded",sep="")
}

for (i in 2:length(cleaned.vector)){
  output$store.cleaned <- usable.cleaning.coded(vector.clean=output[,cleaned.vector[i]],vector.raw=output[,cleaned.vector[i]-1])
  names(output)[ncol(output)] <- paste("q",i,".cleaned.coded",sep="")
}

for (i in 2:length(cleaned.vector)){
  output$store.raw <- usable.raw.coded(vector.clean=output[,cleaned.vector[i]],vector.raw=output[,cleaned.vector[i]-1])
  names(output)[ncol(output)] <- paste("q",i,".raw.coded",sep="")
}

var.discarded.select <- seq(from=79, to=95, by=1)
var.cleaned.select <- seq(from=96, to=112, by=1)
var.raw.select <- seq(from=113, to=129)

output$raw.responses <- rowSums(!is.na(output[,raw.vector[2:length(raw.vector)]])) #Eliminating Q1 language response

output$discarded.responses <- rowSums(output[,var.discarded.select], na.rm = TRUE)
output$usable.cleaned <- rowSums(output[,var.cleaned.select], na.rm = TRUE)
output$usable.raw <- rowSums(output[,var.raw.select], na.rm = TRUE)

# sum(c(sum(output$discarded.responses),sum(output$usable.cleaned),sum(output$usable.raw))) #checked: 655

#Total responses, models
mod.discarded.clst <- lm_robust(discarded.responses ~ Nominated, data=output, clusters = Vname)
mod.discarded.ci <- cbind(mod.discarded.clst$conf.low, mod.discarded.clst$conf.high)

mod.cleaned.clst <- lm_robust(usable.cleaned ~ Nominated, data=output, clusters = Vname)
mod.cleaned.ci <- cbind(mod.cleaned.clst$conf.low, mod.cleaned.clst$conf.high)

mod.raw.clst <- lm_robust(usable.raw ~ Nominated, data=output, clusters = Vname)
mod.raw.ci <- cbind(mod.raw.clst$conf.low, mod.raw.clst$conf.high)

###Total responses, naive for model objects
mod.discarded <- lm(discarded.responses ~ Nominated, data=output)
mod.cleaned <- lm(usable.cleaned ~ Nominated, data=output)
mod.raw <- lm(usable.raw ~ Nominated, data=output)

#Table, top part
stargazer(mod.discarded, mod.cleaned, mod.raw, type = "latex",
          dep.var.labels.include = FALSE,
          covariate.labels = c("Neighbor Nomination"),
          ci = TRUE,
          ci.custom = list(mod.discarded.ci,
                           mod.cleaned.ci,
                           mod.raw.ci),
          df = FALSE, omit.stat = c("rsq","ser","f"),
          notes = "", notes.append = FALSE, notes.label = "",
          column.labels = c("Discarded","Usable After Cleaning","Usable Raw"),
          report = c('vcs'), model.numbers = FALSE
)


### Phase 2 ----

##Data setup
ph2.cleaned <- ph2[ph2$out.sample.replacement==0,]
ph2.cleaned$usable <- ifelse(ph2.cleaned$Data.Usable.mb=="Yes",1,0)
ph2.cleaned$discarded <- ifelse(ph2.cleaned$Data.Usable.mb=="No",1,0)
ph2.cleaned$original <- substring(ph2.cleaned$Original.Response, 6)
ph2.cleaned$cleaned <- ifelse(ph2.cleaned$original!=ph2.cleaned$Cleaned.Response,1,0)
ph2.cleaned$cleaned <- ifelse(!is.na(ph2.cleaned$original) & ph2.cleaned$Data.Usable.mb=="No", 1, ph2.cleaned$cleaned)

ph2.cleaned$usable.raw <- ifelse(ph2.cleaned$Data.Usable.mb=="Yes" & ph2.cleaned$original==ph2.cleaned$Cleaned.Response,1,0)
ph2.cleaned$usable.cleaned <- ifelse(ph2.cleaned$Data.Usable.mb=="Yes" & ph2.cleaned$original!=ph2.cleaned$Cleaned.Response,1,0)

#table(ph2.cleaned$discarded)
#table(ph2.cleaned$usable.cleaned)
#table(ph2.cleaned$usable.raw) #checked: responses add to total row number in ph2.cleaned

#Re-compiling total messages of each type:

for (i in 1:nrow(ph2.reporters)){
  ph2.reporters$discarded.responses[i] <- nrow(subset(ph2.cleaned, Subject.ID==ph2.reporters$Subject.ID[i] & discarded==1))
  ph2.reporters$usable.cleaned[i] <- nrow(subset(ph2.cleaned, Subject.ID==ph2.reporters$Subject.ID[i] & usable.cleaned==1))
  ph2.reporters$usable.raw[i] <- nrow(subset(ph2.cleaned, Subject.ID==ph2.reporters$Subject.ID[i] & usable.raw==1))
}

# sum(ph2.reporters$discarded.responses)
# sum(ph2.reporters$usable.cleaned)
# sum(ph2.reporters$usable.raw) #check: all responses in commented tables above have compiled correctly


###Responses of different types, cluster-robust SE
mod.discarded.clst <- lm_robust(discarded.responses ~ Responsiveness + recruitment + lc1.announce + Recruited.Phase2, data=ph2.reporters,
                             clusters = location.id)
mod.discarded.ci <- cbind(mod.discarded.clst$conf.low, mod.discarded.clst$conf.high)

mod.cleaned.clst <- lm_robust(usable.cleaned ~ Responsiveness + recruitment + lc1.announce + Recruited.Phase2, data=ph2.reporters,
                          clusters = location.id)
mod.cleaned.ci <- cbind(mod.cleaned.clst$conf.low, mod.cleaned.clst$conf.high)

mod.raw.clst <- lm_robust(usable.raw ~ Responsiveness + recruitment + lc1.announce + Recruited.Phase2, data=ph2.reporters,
                          clusters = location.id)
mod.raw.ci <- cbind(mod.raw.clst$conf.low, mod.raw.clst$conf.high)

###Total responses, naive for model objects
mod.discarded <- lm(discarded.responses ~ Responsiveness + recruitment + lc1.announce + Recruited.Phase2, data=ph2.reporters)

mod.cleaned <- lm(usable.cleaned ~ Responsiveness + recruitment + lc1.announce + Recruited.Phase2, data=ph2.reporters)

mod.raw <- lm(usable.raw ~ Responsiveness + recruitment + lc1.announce + Recruited.Phase2, data=ph2.reporters)

#Table, bottom part
stargazer(mod.discarded, mod.cleaned, mod.raw, type = "latex",
          dep.var.labels.include = FALSE,
          covariate.labels = c("Responsiveness","Neighbor Nomination","LC1 Nomination","LC1 Announcement"),
          ci = TRUE,
          ci.custom = list(mod.discarded.ci,
                           mod.cleaned.ci,
                           mod.raw.ci),
          df = FALSE, omit.stat = c("rsq","ser","f"),
          notes = "", notes.append = FALSE, notes.label = "",
          column.labels = c("Discarded","Usable After Cleaning","Usable Raw"),
          report = c('vcs'), model.numbers = FALSE
)
```

## Figure D1: Results from the Phase 2 Experiment using difference-in-means for each treatment arm independently

```{r pap-diff-means-ph2}

source("multiplot.R")

##Responsiveness
a<-data.frame(prop.table(table(ph2.reporters$Responsiveness,ph2.reporters$active.ever),1))[3:4,]
names(a) <- c("Responsiveness","active.ever","prop")
a<-within(a, Responsiveness <- factor(Responsiveness, levels=c(1,0)))
a$Responsiveness <- revalue(a$Responsiveness, c("1"="Responsivness","0"="Control"))
ctl.means <- rep(NA,1000)
resp.means <- rep(NA,1000)
table(ph2.reporters$Responsiveness,ph2.reporters$active.ever) #Used to get SE bars in loop below
set.seed(201)
for (i in 1:1000){
  ctl.means[i] <- mean(sample(c(rep(0,982),rep(1,394)), replace=T))
  resp.means[i] <- mean(sample(c(rep(0,968),rep(1,522)), replace=T))
} 
a$se <- c(sd(ctl.means),sd(resp.means))
se.bars <- aes(ymax = a$prop + a$se, ymin = a$prop - a$se)
ever.resp <- ggplot(data=a, aes(x=Responsiveness, y=prop)) + geom_bar(stat="identity") + geom_errorbar(se.bars, width=0.3) + ylab("Proportion Ever Responding") + ggtitle("(A) Ever Responding") + theme(plot.title = element_text(lineheight=1, face="bold"))


total.responses <- c(sum(ph2.reporters$total.responses[ph2.reporters$Responsiveness==0]),
               sum(ph2.reporters$total.responses[ph2.reporters$Responsiveness==1]))
reporters <- as.numeric(table(ph2.reporters$Responsiveness))
b <- data.frame(total.responses,reporters)
row.names(b) <- c("Control","Responsiveness")
b$avg.total.responses <- b$total.responses/b$reporters
b$Responsiveness <- factor(row.names(b), levels=c("Responsiveness","Control"))
ctl.means <- rep(NA,1000)
resp.means <- rep(NA,1000)
set.seed(201)
for (i in 1:1000){
  ctl.means[i] <- mean(sample(ph2.reporters$total.responses[ph2.reporters$Responsiveness==0], replace=T))
  resp.means[i] <- mean(sample(ph2.reporters$total.responses[ph2.reporters$Responsiveness==1], replace=T))
}
b$se <- c(sd(ctl.means),sd(resp.means))
se.bars.b <- aes(ymax = b$avg.total.responses + b$se, ymin = b$avg.total.responses - b$se)
avg.resp <- ggplot(data=b, aes(x=Responsiveness, y=avg.total.responses)) + geom_bar(stat="identity") + geom_errorbar(se.bars.b, width=0.3) + ylab("Total Responses Per Reporter") + ggtitle("(B) Average Responses") + theme(plot.title = element_text(lineheight=1, face="bold"))


open.responses <- c(sum(ph2.reporters$last2week.responses[ph2.reporters$Responsiveness==0]),
                     sum(ph2.reporters$last2week.responses[ph2.reporters$Responsiveness==1]))
reporters <- as.numeric(table(ph2.reporters$Responsiveness))
c <- data.frame(open.responses,reporters)
row.names(c) <- c("Control","Responsiveness")
c$avg.open.responses <- c$open.responses/c$reporters
c$Responsiveness <- factor(row.names(c), levels=c("Responsiveness","Control"))
ctl.means <- rep(NA,1000)
resp.means <- rep(NA,1000)
set.seed(201)
for (i in 1:1000){
  ctl.means[i] <- mean(sample(ph2.reporters$last2week.responses[ph2.reporters$Responsiveness==0], replace=T))
  resp.means[i] <- mean(sample(ph2.reporters$last2week.responses[ph2.reporters$Responsiveness==1], replace=T))
}
c$se <- c(sd(ctl.means),sd(resp.means))
se.bars.c <- aes(ymax = c$avg.open.responses + c$se, ymin = c$avg.open.responses - c$se)
open.resp <- ggplot(data=c, aes(x=Responsiveness, y=avg.open.responses)) + geom_bar(stat="identity") + geom_errorbar(se.bars.c, width=0.3) + ylab("Open Responses Per Reporter") + ggtitle("(C) Open Responses") + theme(plot.title = element_text(lineheight=1, face="bold"))


##LC1 Announcement
d<-data.frame(prop.table(table(ph2.p2.reporters$lc1.announce,ph2.p2.reporters$active.ever),1))[3:4,]
names(d) <- c("lc1.announce","active.ever","prop")
d<-within(d, lc1.announce <- factor(lc1.announce, levels=c(1,0)))
d$lc1.announce <- revalue(d$lc1.announce, c("1"="LC1 Announcement","0"="Control"))
ctl.means <- rep(NA,1000)
ann.means <- rep(NA,1000)
table(ph2.p2.reporters$lc1.announce,ph2.p2.reporters$active.ever) #Used to get SE bars in loop below
set.seed(201)
for (i in 1:1000){
  ctl.means[i] <- mean(sample(c(rep(0,596),rep(1,342)), replace=T))
  ann.means[i] <- mean(sample(c(rep(0,562),rep(1,345)), replace=T))
} 
d$se <- c(sd(ctl.means),sd(ann.means))
se.bars.d <- aes(ymax = d$prop + d$se, ymin = d$prop - d$se)
ever.ann <- ggplot(data=d, aes(x=lc1.announce, y=prop)) + geom_bar(stat="identity") + geom_errorbar(se.bars.d, width=0.3) + ylab("Proportion Ever Responding") + xlab("LC1 Announcement") + ggtitle("(D) Ever Responding") + theme(plot.title = element_text(lineheight=1, face="bold"))


total.responses <- c(sum(ph2.p2.reporters$total.responses[ph2.p2.reporters$lc1.announce==0]),
                     sum(ph2.p2.reporters$total.responses[ph2.p2.reporters$lc1.announce==1]))
reporters <- as.numeric(table(ph2.p2.reporters$lc1.announce))
e <- data.frame(total.responses,reporters)
row.names(e) <- c("Control","LC1 Announcement")
e$avg.total.responses <- e$total.responses/e$reporters
e$lc1.announce <- factor(row.names(e), levels=c("LC1 Announcement","Control"))
ctl.means <- rep(NA,1000)
ann.means <- rep(NA,1000)
set.seed(201)
for (i in 1:1000){
  ctl.means[i] <- mean(sample(ph2.p2.reporters$total.responses[ph2.p2.reporters$lc1.announce==0], replace=T))
  ann.means[i] <- mean(sample(ph2.p2.reporters$total.responses[ph2.p2.reporters$lc1.announce==1], replace=T))
}
e$se <- c(sd(ctl.means),sd(ann.means))
se.bars.e <- aes(ymax = e$avg.total.responses + e$se, ymin = e$avg.total.responses - e$se)
avg.ann <- ggplot(data=e, aes(x=lc1.announce, y=avg.total.responses)) + geom_bar(stat="identity") + geom_errorbar(se.bars.e, width=0.3) + ylab("Total Responses Per Reporter") + xlab("LC1 Announcement") + ggtitle("(E) Average Responses") + theme(plot.title = element_text(lineheight=1, face="bold"))


open.responses <- c(sum(ph2.p2.reporters$last2week.responses[ph2.p2.reporters$lc1.announce==0]),
                    sum(ph2.p2.reporters$last2week.responses[ph2.p2.reporters$lc1.announce==1]))
reporters <- as.numeric(table(ph2.p2.reporters$lc1.announce))
f <- data.frame(open.responses,reporters)
row.names(f) <- c("Control","LC1 Announcement")
f$avg.open.responses <- f$open.responses/f$reporters
f$lc1.announce <- factor(row.names(f), levels=c("LC1 Announcement","Control"))
ctl.means <- rep(NA,1000)
ann.means <- rep(NA,1000)
set.seed(201)
for (i in 1:1000){
  ctl.means[i] <- mean(sample(ph2.p2.reporters$last2week.responses[ph2.p2.reporters$lc1.announce==0], replace=T))
  ann.means[i] <- mean(sample(ph2.p2.reporters$last2week.responses[ph2.p2.reporters$lc1.announce==1], replace=T))
}
f$se <- c(sd(ctl.means),sd(ann.means))
se.bars.f <- aes(ymax = f$avg.open.responses + f$se, ymin = f$avg.open.responses - f$se)
open.ann <- ggplot(data=f, aes(x=lc1.announce, y=avg.open.responses)) + geom_bar(stat="identity") + geom_errorbar(se.bars.f, width=0.3) + ylab("Open Responses Per Reporter") + xlab("LC1 Announcement") + ggtitle("(F) Open Responses") + theme(plot.title = element_text(lineheight=1, face="bold"))


##LC1 Nomination
g<-data.frame(prop.table(table(ph2.p2.reporters$recruitment,ph2.p2.reporters$active.ever),1))[c(4,6),]
names(g) <- c("recruitment","active.ever","prop")
g<-within(g, recruitment <- factor(recruitment, levels=c("lc1.recruit","random")))
g$recruitment <- revalue(g$recruitment, c("lc1.recruit"="LC1 Recruitment","random"="Street Recruitment"))
ctl.means <- rep(NA,1000)
rec.means <- rep(NA,1000)
table(ph2.p2.reporters$recruitment,ph2.p2.reporters$active.ever) #Used to get SE bars in loop below
set.seed(201)
for (i in 1:1000){
  ctl.means[i] <- mean(sample(c(rep(0,573),rep(1,345)), replace=T))
  rec.means[i] <- mean(sample(c(rep(0,585),rep(1,342)), replace=T))
} 
g$se <- c(sd(ctl.means),sd(rec.means))
se.bars.g <- aes(ymax = g$prop + g$se, ymin = g$prop - g$se)
ever.rec <- ggplot(data=g, aes(x=recruitment, y=prop)) + geom_bar(stat="identity") + geom_errorbar(se.bars.g, width=0.3) + ylab("Proportion Ever Responding") + xlab("LC1 Recruitment") + ggtitle("(G) Ever Responding") + theme(plot.title = element_text(lineheight=1, face="bold"))


total.responses <- c(sum(ph2.p2.reporters$total.responses[ph2.p2.reporters$recruitment=="random"]),
                     sum(ph2.p2.reporters$total.responses[ph2.p2.reporters$recruitment=="lc1.recruit"]))
reporters <- as.numeric(table(ph2.p2.reporters$recruitment))[c(1,3)]
h <- data.frame(total.responses,reporters)
row.names(h) <- c("Street Recruitment","LC1 Recruitment")
h$avg.total.responses <- h$total.responses/h$reporters
h$recruitment <- factor(row.names(h), levels=c("LC1 Recruitment","Street Recruitment"))
ctl.means <- rep(NA,1000)
rec.means <- rep(NA,1000)
set.seed(201)
for (i in 1:1000){
  ctl.means[i] <- mean(sample(ph2.p2.reporters$total.responses[ph2.p2.reporters$recruitment=="random"], replace=T))
  rec.means[i] <- mean(sample(ph2.p2.reporters$total.responses[ph2.p2.reporters$recruitment=="lc1.recruit"], replace=T))
}
h$se <- c(sd(ctl.means),sd(rec.means))
se.bars.h <- aes(ymax = h$avg.total.responses + h$se, ymin = h$avg.total.responses - h$se)
avg.rec <- ggplot(data=h, aes(x=recruitment, y=avg.total.responses)) + geom_bar(stat="identity") + geom_errorbar(se.bars.h, width=0.3) + ylab("Total Responses Per Reporter") + xlab("LC1 Recruitment") + ggtitle("(H) Average Responses") + theme(plot.title = element_text(lineheight=1, face="bold"))


open.responses <- c(sum(ph2.p2.reporters$last2week.responses[ph2.p2.reporters$recruitment=="random"]),
                    sum(ph2.p2.reporters$last2week.responses[ph2.p2.reporters$recruitment=="lc1.recruit"]))
reporters <- as.numeric(table(ph2.p2.reporters$recruitment))[c(1,3)]
i <- data.frame(open.responses,reporters)
row.names(i) <- c("Street Recruitment","LC1 Recruitment")
i$avg.open.responses <- i$open.responses/i$reporters
i$recruitment <- factor(row.names(i), levels=c("LC1 Recruitment","Street Recruitment"))
ctl.means <- rep(NA,1000)
rec.means <- rep(NA,1000)
set.seed(201)
for (k in 1:1000){
  ctl.means[k] <- mean(sample(ph2.p2.reporters$last2week.responses[ph2.p2.reporters$recruitment=="random"], replace=T))
  rec.means[k] <- mean(sample(ph2.p2.reporters$last2week.responses[ph2.p2.reporters$recruitment=="lc1.recruit"], replace=T))
}
i$se <- c(sd(ctl.means),sd(rec.means))
se.bars.i <- aes(ymax = i$avg.open.responses + i$se, ymin = i$avg.open.responses - i$se)
open.rec <- ggplot(data=i, aes(x=recruitment, y=avg.open.responses)) + geom_bar(stat="identity") + geom_errorbar(se.bars.i, width=0.3) + ylab("Open Responses Per Reporter") + xlab("LC1 Recruitment") + ggtitle("(I) Open Responses") + theme(plot.title = element_text(lineheight=1, face="bold"))


#pdf("Provision_CombinedPhase2Results_181004.pdf",width=10,height=10)
multiplot(ever.resp,ever.ann,ever.rec,
          avg.resp,avg.ann,avg.rec,
          open.resp,open.ann,open.rec,
          cols=3)
#dev.off()
```

### Phase 1 Hypothsis 2 (reported only in Appendix D text)

```{r ph1-h2-pap}
##H2: Of reporters who respond to at least one prompt in the first two weeks of the experiment, fewer nominated reporters will discontinue reporting than randomly recruited reporters, measured as a lack of reporting for at least two weeks that continues through the end of the 8-week experiment.

output$active.last2weeks #DV

for (i in 1:nrow(output)){
  output$first2weeks.responses[i] <- sum(!is.na(output)[i,c("q2.cleaned","q3.cleaned","q4.cleaned","q5.cleaned","q6.cleaned")])
} 

sub <- subset(output, first2weeks.responses>0) #118 reporters

mean(sub$active.last2weeks[sub$assignment=="nomination"])
mean(sub$active.last2weeks[sub$assignment=="random"])
discontinue <- mean(sub$active.last2weeks[sub$assignment=="nomination"]) - mean(sub$active.last2weeks[sub$assignment=="random"])
discontinue

mod.discontinue <- lm(active.last2weeks ~ assignment, data=sub)
summary(mod.discontinue)
```

## Table E1: CACE for LC1 Announcement Condition with missing compliance data dropped

```{r comp-setup, message=FALSE}
library(AER) #version 1.2-7

ph2.reporters.only <- ph2.reporters[ph2.reporters$Recruitment.Phase==2,]
lc1.compliance <- read.csv("./ph2_lc1_announcement_survey.csv", na.strings="N/A", stringsAsFactors=FALSE)
lc1.compliance$location.id <- paste(tolower(lc1.compliance$Division),tolower(lc1.compliance$Parish),tolower(lc1.compliance$VNAME),sep=".")
#no.match.list <- lc1.compliance$location.id[!(lc1.compliance$location.id %in% ph2.reporters.only$location.id)]
announce.short <- lc1.compliance[,c(11,16)]

ph2.reporters.compl <- merge(ph2.reporters.only,announce.short, by="location.id", all.x=TRUE)
#table(ph2.reporters.compl$announcement.cleaned, ph2.reporters.compl$lc1.announce)

#Assuming only one-sided non-compliance
ph2.reporters.compl$announcement.cleaned <- ifelse(ph2.reporters.compl$lc1.announce==0,"no",ph2.reporters.compl$announcement.cleaned)
ph2.reporters.compl$announcement.upper <- ifelse(is.na(ph2.reporters.compl$announcement.cleaned),"yes",ph2.reporters.compl$announcement.cleaned)
ph2.reporters.compl$announcement.lower <- ifelse(is.na(ph2.reporters.compl$announcement.cleaned),"no",ph2.reporters.compl$announcement.cleaned)
```

```{r tab-e1}
###Table E1. CACE for LC1 Announcement Condition with missing compliance data dropped

##estimtr, for CIs
mod1 <- iv_robust(total.responses ~ Responsiveness + recruitment + announcement.cleaned | Responsiveness + recruitment + lc1.announce, data=ph2.reporters.compl, clusters = location.id)
mod1.ci <- cbind(mod1$conf.low,mod1$conf.high)

mod4 <- iv_robust(active.ever ~ Responsiveness + recruitment + announcement.cleaned | Responsiveness + recruitment + lc1.announce, data=ph2.reporters.compl, clusters = location.id)
mod4.ci <- cbind(mod4$conf.low,mod4$conf.high)

mod7 <- iv_robust(last2week.responses ~ Responsiveness + recruitment + announcement.cleaned | Responsiveness + recruitment + lc1.announce, data=ph2.reporters.compl, clusters = location.id)
mod7.ci <- cbind(mod7$conf.low,mod7$conf.high)

##AER, for stargazer
mod1 <- ivreg(total.responses ~ Responsiveness + recruitment + announcement.cleaned | Responsiveness + recruitment + lc1.announce, data=ph2.reporters.compl)
#summary(mod1, vcov = sandwich, diagnostics = TRUE)

mod4 <- ivreg(active.ever ~ Responsiveness + recruitment + announcement.cleaned | Responsiveness + recruitment + lc1.announce, data=ph2.reporters.compl)
#summary(mod4, vcov = sandwich, diagnostics = TRUE)

mod7 <- ivreg(last2week.responses ~ Responsiveness + recruitment + announcement.cleaned | Responsiveness + recruitment + lc1.announce, data=ph2.reporters.compl)
#summary(mod7, vcov = sandwich, diagnostics = TRUE)


stargazer(mod1,mod4,mod7, type="latex",
          df = FALSE, omit.stat = c("rsq","ser","f"),
          dep.var.labels = c("Total Responses","Active Ever","Last 2 Week Responses"),
          notes = "", notes.append = FALSE, notes.label = "",
          covariate.labels = c("Responsiveness","LC1 Nomination","LC1 Announcement","Intercept"),
          dep.var.caption  = "Procedure for Missing Compliance Data: Dropped",
          intercept.bottom = TRUE,
          ci = TRUE,
          ci.custom = list(mod1.ci,
                           mod4.ci,
                           mod7.ci),
          report = c('vcs'), model.numbers = FALSE
) #Date and time: Mon, Oct 08, 2018 - 10:37:55
```

## Table E2: CACE for LC1 Announcement Condition with missing compliance data assumed to be in compliance

```{r tab-e2}
###Table E2. CACE for LC1 Announcement Condition with missing compliance data assumed to be in compliance

##estimtr, for CIs
mod2 <- iv_robust(total.responses ~ Responsiveness + recruitment + announcement.upper | Responsiveness + recruitment + lc1.announce, data=ph2.reporters.compl, clusters = location.id)
mod2.ci <- cbind(mod2$conf.low,mod2$conf.high)

mod5 <- iv_robust(active.ever ~ Responsiveness + recruitment + announcement.upper | Responsiveness + recruitment + lc1.announce, data=ph2.reporters.compl, clusters = location.id)
mod5.ci <- cbind(mod5$conf.low,mod5$conf.high)

mod8 <- iv_robust(last2week.responses ~ Responsiveness + recruitment + announcement.upper | Responsiveness + recruitment + lc1.announce, data=ph2.reporters.compl, clusters = location.id)
mod8.ci <- cbind(mod8$conf.low,mod8$conf.high)

##AER, for stargazer
mod2 <- ivreg(total.responses ~ Responsiveness + recruitment + announcement.upper | Responsiveness + recruitment + lc1.announce, data=ph2.reporters.compl)
#summary(mod2, vcov = sandwich, diagnostics = TRUE)

mod5 <- ivreg(active.ever ~ Responsiveness + recruitment + announcement.upper | Responsiveness + recruitment + lc1.announce, data=ph2.reporters.compl)
#summary(mod5, vcov = sandwich, diagnostics = TRUE)

mod8 <- ivreg(last2week.responses ~ Responsiveness + recruitment + announcement.upper | Responsiveness + recruitment + lc1.announce, data=ph2.reporters.compl)
#summary(mod8, vcov = sandwich, diagnostics = TRUE)


stargazer(mod2,mod5,mod8, type="latex",
          df = FALSE, omit.stat = c("rsq","ser","f"),
          dep.var.labels = c("Total Responses","Active Ever","Last 2 Week Responses"),
          notes = "", notes.append = FALSE, notes.label = "",
          covariate.labels = c("Responsiveness","LC1 Nomination","LC1 Announcement","Intercept"),
          dep.var.caption  = "Procedure for Missing Compliance Data: Upper Bound",
          intercept.bottom = TRUE,
          ci = TRUE,
          ci.custom = list(mod2.ci,
                           mod5.ci,
                           mod8.ci),
          report = c('vcs'), model.numbers = FALSE
) #Date and time: Mon, Oct 08, 2018 - 10:38:46
```

## Table E3: CACE for LC1 Announcement Condition with missing compliance data assumed to be out of compliance

```{r tab-e3}
###Table E3. CACE for LC1 Announcement Condition with missing compliance data assumed to be out of compliance

##estimtr, for CIs
mod3 <- iv_robust(total.responses ~ Responsiveness + recruitment + announcement.lower | Responsiveness + recruitment + lc1.announce, data=ph2.reporters.compl, clusters = location.id)
mod3.ci <- cbind(mod3$conf.low,mod3$conf.high)

mod6 <- iv_robust(active.ever ~ Responsiveness + recruitment + announcement.lower | Responsiveness + recruitment + lc1.announce, data=ph2.reporters.compl, clusters = location.id)
mod6.ci <- cbind(mod6$conf.low,mod6$conf.high)

mod9 <- iv_robust(last2week.responses ~ Responsiveness + recruitment + announcement.lower | Responsiveness + recruitment + lc1.announce, data=ph2.reporters.compl, clusters = location.id)
mod9.ci <- cbind(mod9$conf.low,mod9$conf.high)

##AER, for stargazer
mod3 <- ivreg(total.responses ~ Responsiveness + recruitment + announcement.lower | Responsiveness + recruitment + lc1.announce, data=ph2.reporters.compl)
#summary(mod3, vcov = sandwich, diagnostics = TRUE)

mod6 <- ivreg(active.ever ~ Responsiveness + recruitment + announcement.lower | Responsiveness + recruitment + lc1.announce, data=ph2.reporters.compl)
#summary(mod6, vcov = sandwich, diagnostics = TRUE)

mod9 <- ivreg(last2week.responses ~ Responsiveness + recruitment + announcement.lower | Responsiveness + recruitment + lc1.announce, data=ph2.reporters.compl)
#summary(mod9, vcov = sandwich, diagnostics = TRUE)


stargazer(mod3,mod6,mod9, type="latex",
          df = FALSE, omit.stat = c("rsq","ser","f"),
          dep.var.labels = c("Total Responses","Active Ever","Last 2 Week Responses"),
          notes = "", notes.append = FALSE, notes.label = "",
          covariate.labels = c("Responsiveness","LC1 Nomination","LC1 Announcement","Intercept"),
          dep.var.caption  = "Procedure for Missing Compliance Data: Lower Bound",
          intercept.bottom = TRUE,
          ci = TRUE,
          ci.custom = list(mod3.ci,
                           mod6.ci,
                           mod9.ci),
          report = c('vcs'), model.numbers = FALSE
) #Date and time: Mon, Oct 08, 2018 - 10:41:15
```

## Table F1: Parameter estimates for model F1

```{r tab-social-norm}
##Effect on number of messages from previous week on reporting behavior ----

##Setup ----
library(reshape2) #version 1.4.3
#Note: ph2.reporters has to be fully formed in setup block

sub.pan <- ph2.reporters[,c(1,12,8,24,29:43)]
ph2.panel <- melt(sub.pan, id.vars=c("Subject.ID","location.id","Responsiveness","direct"),
                  measure.vars = c("q1.response","q2.response","q3.response","q4.response","q5.response","q6.response","q7.response","q8.response","q9.response","q10.response","q11.response","q12.response","q13.response","q14.response","q15.response"),
                  variable.name = "question.char", value.name = "response")
ph2.panel$question <- sort(rep(1:15,nrow(sub.pan)))
ph2.panel$week <- sort(rep(c(1,1,2,2,3,3,4,4,5,5,6,6,7,8,8),nrow(sub.pan)))
ph2.panel$response <- as.numeric(ph2.panel$response)


sub.pan.all <- ph2.reporters[,c(1,12,8,24,59:73)]
ph2.panel.all <- melt(sub.pan.all, id.vars=c("Subject.ID","location.id","Responsiveness","direct"),
                  measure.vars = c("q1.responses.all","q2.responses.all","q3.responses.all","q4.responses.all","q5.responses.all","q6.responses.all","q7.responses.all","q8.responses.all","q9.responses.all","q10.responses.all","q11.responses.all","q12.responses.all","q13.responses.all","q14.responses.all","q15.responses.all"),
                  variable.name = "question.char", value.name = "response")
ph2.panel$responses.all <- ph2.panel.all$response

sub.pan.clean <- ph2.reporters[,c(1,12,8,24,44:58)]
ph2.panel.clean <- melt(sub.pan.clean, id.vars=c("Subject.ID","location.id","Responsiveness","direct"),
                      measure.vars = c("q1.responses.cleaned","q2.responses.cleaned","q3.responses.cleaned","q4.responses.cleaned","q5.responses.cleaned","q6.responses.cleaned","q7.responses.cleaned","q8.responses.cleaned","q9.responses.cleaned","q10.responses.cleaned","q11.responses.cleaned","q12.responses.cleaned","q13.responses.cleaned","q14.responses.cleaned","q15.responses.cleaned"),
                      variable.name = "question.char", value.name = "response")
ph2.panel$responses.clean <- ph2.panel.clean$response


plug <- read.csv("./ph2_weekly_report_number_sent.csv")
plug$location.id <- tolower(paste(plug$Division,plug$Parish,plug$zone,sep="."))
plug$location.id[!(plug$location.id %in% ph2.panel$location.id)]
#Missing note: makindye.makindye i.kipamba: out-of-sample replacement zone; deviation from sampling protocol, removed from reporter file
#Missing note: rubaga.namirembe.mengo town: out-of-sample replacement zone; deviation from sampling protocol, removed from reporter file
plug$location.id <- ifelse(plug$location.id=="rubaga.mbuya ii.zone 7","nakawa.mbuya ii.zone 7",plug$location.id) #See note above

ph2.panel$zone.plug.actual <- NA
for (i in 1:nrow(ph2.panel)){
  
  if(ph2.panel$week[i]==2 & (ph2.panel$location.id[i] %in% plug$location.id)){
    ph2.panel$zone.plug.actual[i] <- plug[plug$location.id==ph2.panel$location.id[i],"week2"]
  }
  
  if(ph2.panel$week[i]==3 & (ph2.panel$location.id[i] %in% plug$location.id)){
    ph2.panel$zone.plug.actual[i] <- plug[plug$location.id==ph2.panel$location.id[i],"week3"]
  }
  
  if(ph2.panel$week[i]==4 & (ph2.panel$location.id[i] %in% plug$location.id)){
    ph2.panel$zone.plug.actual[i] <- plug[plug$location.id==ph2.panel$location.id[i],"week4"]
  }
  
  if(ph2.panel$week[i]==6 & (ph2.panel$location.id[i] %in% plug$location.id)){
    ph2.panel$zone.plug.actual[i] <- plug[plug$location.id==ph2.panel$location.id[i],"week5"]
  }
} #Number of messages by zone sent prior to weeks 2,3,4,6
ph2.panel <- ph2.panel[,c(1,2,3,4,5,7,8,6,9,10,11)] #Reordering the dataframe


for (i in 1:nrow(ph2.panel)){
  ph2.panel$zone.all[i] <- sum(subset(ph2.panel, week==ph2.panel$week[i]-1 & location.id==ph2.panel$location.id[i])$responses.all, na.rm=TRUE)
  ph2.panel$zone.cleaned[i] <- sum(subset(ph2.panel, week==ph2.panel$week[i]-1 & location.id==ph2.panel$location.id[i])$responses.clean, na.rm=TRUE)
}

cor(ph2.panel$zone.plug.actual,ph2.panel$zone.all, use="pairwise.complete.obs") #0.9881794
cor(ph2.panel$zone.plug.actual,ph2.panel$zone.cleaned, use="pairwise.complete.obs") #0.9535605


##Adding individual level activity previous week
for (i in 1:nrow(ph2.panel)){
  sub <- subset(ph2.panel, week==ph2.panel$week[i]-1 & Subject.ID==ph2.panel$Subject.ID[i] & response==1)
  ph2.panel$active.last.week[i] <- ifelse(nrow(sub)==0,0,1)
}

ph2.panel.elig <- subset(ph2.panel, week> 1 & Responsiveness==1)
ph2.panel.elig$zone.message <- ifelse(ph2.panel.elig$Responsiveness==1 &
                                      (ph2.panel.elig$week==2 | ph2.panel.elig$week==3 | ph2.panel.elig$week==4 | ph2.panel.elig$week==6),
                                      1,0) #Number of messages by zone sent prior to weeks 2,3,4,6

## Model ----
library(lfe) #version 2.8-3

mod <- felm(response ~ zone.all*zone.message + active.last.week | Subject.ID | 0 | location.id, data=ph2.panel.elig)
summary(mod)
```

## Figure F1: Marginal effects of receiving a message about the number zone-wise reports in the previous week, conditional on the number of reports communicated in the message among reporters treated with Responsiveness

```{r fig-social-norm}
## Figure ----
library(interplot) #version 0.2.1
fake <- 0:19
coef1 <- 0.0602413 + fake*(-0.0009483)

lb <- rep(NA, length(fake))
ub <- rep(NA, length(fake))

for (i in 1:length(fake)){
lb[i] <- sort(rnorm(10000, mean=0.0602413, sd=0.0082702) + fake[i]*rnorm(10000, mean=-0.0009483, sd=0.0012884))[250]
ub[i] <- sort(rnorm(10000, mean=0.0602413, sd=0.0082702) + fake[i]*rnorm(10000, mean=-0.0009483, sd=0.0012884))[9750]
}
df_fake <- data.frame(cbind(fake, coef1, lb, ub))
interplot(df_fake) + ylim(-0.025,0.125) + xlab("Number of Reports in Zone (Previous Week)") + ylab("Effect of Message about \nNumber of Reports in Zone")
```

## Figure F2: Treatment effect on main outcomes excluding different windows of responses after the midline call center

```{r ph2-exclude-call}

###Analyzing the reporter-wise reporting data during Phase 2, excluding post-call center ####

# Here we take exactly the above code and source it in a loop to extract parameter estimates for responsiveness excluding observations after the midpoint call center

pre.start <- which(colnames(ph2.reporters)=="q1.responses.cleaned")
pre.end <- which(colnames(ph2.reporters)=="q8.responses.cleaned")
post.start <- which(colnames(ph2.reporters)=="q9.responses.cleaned")
post.end <- which(colnames(ph2.reporters)=="q15.responses.cleaned")

exclude.label <- c("none","Q9","Q9-10","Q9-11","Q9-12","Q9-13","Q9-14","Q9-15")
pool.any.coef <- rep(NA,length(exclude.label))
pool.any.ci.lower <- rep(NA,length(exclude.label))
pool.any.ci.upper <- rep(NA,length(exclude.label))
pool.total.coef <- rep(NA,length(exclude.label))
pool.total.ci.lower <- rep(NA,length(exclude.label))
pool.total.ci.upper <- rep(NA,length(exclude.label))

one.any.coef <- rep(NA,length(exclude.label))
one.any.ci.lower <- rep(NA,length(exclude.label))
one.any.ci.upper <- rep(NA,length(exclude.label))
one.total.coef <- rep(NA,length(exclude.label))
one.total.ci.lower <- rep(NA,length(exclude.label))
one.total.ci.upper <- rep(NA,length(exclude.label))

two.any.coef <- rep(NA,length(exclude.label))
two.any.ci.lower <- rep(NA,length(exclude.label))
two.any.ci.upper <- rep(NA,length(exclude.label))
two.total.coef <- rep(NA,length(exclude.label))
two.total.ci.lower <- rep(NA,length(exclude.label))
two.total.ci.upper <- rep(NA,length(exclude.label))

dta.store <- data.frame(exclude.label,
                        pool.any.coef,pool.any.ci.lower,pool.any.ci.upper,pool.total.coef,pool.total.ci.lower,pool.total.ci.upper,
                        one.any.coef,one.any.ci.lower,one.any.ci.upper,one.total.coef,one.total.ci.lower,one.total.ci.upper,
                        two.any.coef,two.any.ci.lower,two.any.ci.upper,two.total.coef,two.total.ci.lower,two.total.ci.upper)

for (i in 0:7){
  
  if(i %in% 0:6){
    ph2.reporters <- ph2.reporters %>% 
      mutate(total.responses.feed = rowSums(ph2.reporters[,c(pre.start:pre.end,(post.start+i):post.end)])) %>%
      mutate(active.ever.feed = ifelse(total.responses.feed>0,1,0))
  }
  
  if(i == 7){
    ph2.reporters <- ph2.reporters %>% 
      mutate(total.responses.feed = rowSums(ph2.reporters[,c(pre.start:pre.end)])) %>%
      mutate(active.ever.feed = ifelse(total.responses.feed>0,1,0))
  }
  
  source('Phase2_core_feed.R')
  
  dta.store$pool.any.coef[i+1] <- mod.pool.active.clst$coefficients[2]
  dta.store$pool.any.ci.lower[i+1] <- mod.pool.active.clst$conf.low[2]
  dta.store$pool.any.ci.upper[i+1] <- mod.pool.active.clst$conf.high[2]
  dta.store$pool.total.coef[i+1] <- mod.pool.total.clst$coefficients[2]
  dta.store$pool.total.ci.lower[i+1] <- mod.pool.total.clst$conf.low[2]
  dta.store$pool.total.ci.upper[i+1] <- mod.pool.total.clst$conf.high[2]
  
  dta.store$one.any.coef[i+1] <- mod.one.active.clst$coefficients[2]
  dta.store$one.any.ci.lower[i+1] <- mod.one.active.clst$conf.low[2]
  dta.store$one.any.ci.upper[i+1] <- mod.one.active.clst$conf.high[2]
  dta.store$one.total.coef[i+1] <- mod.one.total.clst$coefficients[2]
  dta.store$one.total.ci.lower[i+1] <- mod.one.total.clst$conf.low[2]
  dta.store$one.total.ci.upper[i+1] <- mod.one.total.clst$conf.high[2]
  
  dta.store$two.any.coef[i+1] <- mod.two.active.clst$coefficients[2]
  dta.store$two.any.ci.lower[i+1] <- mod.two.active.clst$conf.low[2]
  dta.store$two.any.ci.upper[i+1] <- mod.two.active.clst$conf.high[2]
  dta.store$two.total.coef[i+1] <- mod.two.total.clst$coefficients[2]
  dta.store$two.total.ci.lower[i+1] <- mod.two.total.clst$conf.low[2]
  dta.store$two.total.ci.upper[i+1] <- mod.two.total.clst$conf.high[2]
  
}

#mod.pooled.clst$ci.lower

#total.r <- rowSums(ph2.reporters[,44:58])
#sum(total.r==ph2.reporters$total.responses) #qXX.responses.cleaned

##Plots

a <- ggplot(dta.store, aes(y=exclude.label, x=pool.any.coef)) +
     geom_point() +
     scale_y_discrete("Exclusions", limits = rev(levels(dta.store$exclude.label))) +
     scale_x_continuous("Treatment Effect (and 95% CI)") +
     geom_errorbarh(aes(xmin=pool.any.ci.lower, xmax=pool.any.ci.upper, height=0.2)) +
     ggtitle("(A) Ever Responding, Pooled") + theme(plot.title = element_text(lineheight=1, face="bold")) +
     geom_vline(xintercept=0, linetype="dotdash")

b <- ggplot(dta.store, aes(y=exclude.label, x=one.any.coef)) +
     geom_point() +
     scale_y_discrete("Exclusions", limits = rev(levels(dta.store$exclude.label))) +
     scale_x_continuous("Treatment Effect (and 95% CI)") +
     geom_errorbarh(aes(xmin=one.any.ci.lower, xmax=one.any.ci.upper, height=0.2)) +
     ggtitle("(B) Ever Responding, Ph1 Recruits") + theme(plot.title = element_text(lineheight=1, face="bold")) +
     geom_vline(xintercept=0, linetype="dotdash")

c <- ggplot(dta.store, aes(y=exclude.label, x=two.any.coef)) +
     geom_point() +
     scale_y_discrete("Exclusions", limits = rev(levels(dta.store$exclude.label))) +
     scale_x_continuous("Treatment Effect (and 95% CI)") +
     geom_errorbarh(aes(xmin=two.any.ci.lower, xmax=two.any.ci.upper, height=0.2)) +
     ggtitle("(C) Ever Responding, Ph2 Recruits") + theme(plot.title = element_text(lineheight=1, face="bold")) +
     geom_vline(xintercept=0, linetype="dotdash")

d <- ggplot(dta.store, aes(y=exclude.label, x=pool.total.coef)) +
     geom_point() +
     scale_y_discrete("Exclusions", limits = rev(levels(dta.store$exclude.label))) +
     scale_x_continuous("Treatment Effect (and 95% CI)") +
     geom_errorbarh(aes(xmin=pool.total.ci.lower, xmax=pool.total.ci.upper, height=0.2)) +
     ggtitle("(D) Total Responses, Pooled") + theme(plot.title = element_text(lineheight=1, face="bold")) +
     geom_vline(xintercept=0, linetype="dotdash")

e <- ggplot(dta.store, aes(y=exclude.label, x=one.total.coef)) +
     geom_point() +
     scale_y_discrete("Exclusions", limits = rev(levels(dta.store$exclude.label))) +
     scale_x_continuous("Treatment Effect (and 95% CI)") +
     geom_errorbarh(aes(xmin=one.total.ci.lower, xmax=one.total.ci.upper, height=0.2)) +
     ggtitle("(E) Total Responses, Ph1 Recruits") + theme(plot.title = element_text(lineheight=1, face="bold")) +
     geom_vline(xintercept=0, linetype="dotdash")

f <- ggplot(dta.store, aes(y=exclude.label, x=two.total.coef)) +
     geom_point() +
     scale_y_discrete("Exclusions", limits = rev(levels(dta.store$exclude.label))) +
     scale_x_continuous("Treatment Effect (and 95% CI)") +
     geom_errorbarh(aes(xmin=two.total.ci.lower, xmax=two.total.ci.upper, height=0.2)) +
     ggtitle("(F) Total Responses, Ph2 Recruits") + theme(plot.title = element_text(lineheight=1, face="bold")) +
     geom_vline(xintercept=0, linetype="dotdash")

#pdf("Interpersonal_Exclusions_190213.pdf",width=12,height=6)
grid.arrange(a,b,c,d,e,f, ncol=3)
#dev.off()
```

## Table F2-4: Recompiled Tables 2-4, with reports arriving after responsiveness messages excluded

```{r response-time}
###Response Delay by Responsiveness Condition ####

#Reimport the mobile phone numbers lost in manual saving of CSV file
ph2.old <- read.csv("./ph2_reports_coded_with_number.csv", comment.char="#", stringsAsFactors=FALSE) #this file has the full mobile phone numbers, which were overwritten in "ph2" w/ mb usability codes
ph2.old <- ph2.old[order(ph2.old$Subject.ID),]
ph2 <- ph2[order(ph2$Subject.ID),]
sum(ph2$Subject.ID==ph2.old$Subject.ID) #6883, total match

ph2$Mobile.Number <- ph2.old$Mobile.Number

full <- read.csv("./ph2_all_reports.csv", stringsAsFactors=FALSE)

full$date <- strptime(full$date, format='%m/%d/%y %H:%M')
full <- subset(full, !is.na(question))

ph2$date <- NA

for (i in 1:nrow(ph2)){
  sub <- subset(full, question==ph2$Question.Number[i] & phone==ph2$Mobile.Number[i] & message==ph2$Original.Response[i])
  ph2$date[i] <- as.character(sub$date[1])

  if(is.na(ph2$date[i])){
    sub <- subset(full, question==ph2$Question.Number[i]+1 & phone==ph2$Mobile.Number[i] & message==ph2$Original.Response[i])
    ph2$date[i] <- as.character(sub$date[1])
  }

    if(is.na(ph2$date[i])){
    sub <- subset(full, question==ph2$Question.Number[i] & phone==ph2$Mobile.Number[i])
    ph2$date[i] <- as.character(sub$date[1])
    }

}

table(is.na(ph2$date),ph2$Data.Usable.mb) #The four obervations that remain are coding errors in data delivered to KCCA

qs <- seq(1:15)
date <- rep(NA,15)
for (i in 1:length(date)){
  sub <- subset(full, question==qs[i])
  date[i] <- as.character(min(sub$date))
}

ph2$min.response <- NA
for (i in 1:15){
  ph2$min.response[ph2$Question.Number==i] <- date[i]
}

resp.window.dta <- read.csv("./ph2_report_response_window.csv", stringsAsFactors = F)

for (i in 1:15){
  ph2$resp.closed[ph2$Question.Number==i] <- resp.window.dta$close[i]
}

ph2$resp.closed <- strptime(ph2$resp.closed, format='%m/%d/%y %H:%M')
ph2$in.time <- ifelse(ph2$date < ph2$resp.closed, 1, 0)
#table(ph2$in.time)

ph2.store <- ph2
ph2 <- subset(ph2, in.time==1)
source("ph2-setup.R") #note: this is an exact copy of the ph2-setup chunk above as of 190516, which re-compiles outcomes variables based on subset

source("tab2-tab3-tab4.R") #note: this is an exact copy of the tab2, tab3, tab4 chunks above as of 190305, which re-compiles the tables based on the subset of in-time responses

```

## Table H1: Total number of active reporters during Phase 2, considering spillover

```{r spillover-setup}
#Subject responses during Phase 2
ph2 <- read.csv("./ph2_reports_coded.csv", comment.char="#", stringsAsFactors=FALSE)
#Note: this file is updated to have final, compiled usability variable "Data.Usable.mb"

#Reimport the mobile phone numbers lost in manual saving of CSV file
ph2.old <- read.csv("./ph2_reports_coded_with_number.csv", comment.char="#", stringsAsFactors=FALSE) #this file has the full mobile phone numbers, which were overwritten in "ph2" w/ mb usability codes
ph2.old <- ph2.old[order(ph2.old$Subject.ID),]
ph2 <- ph2[order(ph2$Subject.ID),]
sum(ph2$Subject.ID==ph2.old$Subject.ID) #6883, total match
ph2$Mobile.Number <- ph2.old$Mobile.Number

source("ph2-setup.R") #note: this is an exact copy of the ph2-setup chunk above as of the date of the file, which re-compiles outcomes variables based on the original "ph2" object. This reverses the subset in the previous chunk.
ph2.reporters.spill <- subset(ph2.reporters, indirect.prob!=0)
```

```{r tab-h1, warnings=F}

#lm_robust, for CIs
mod.pooled <- lm_robust(active.ever ~ exposure + recruitment + lc1.announce + Recruitment.Phase, 
                 data=ph2.reporters.spill, weights = 1/prob.weight, clusters = location.id)
mod.pooled.ci <- cbind(mod.pooled$conf.low,mod.pooled$conf.high)

mod.one <- lm_robust(active.ever ~ exposure + recruitment, 
              data=ph2.reporters.spill[ph2.reporters.spill$Recruitment.Phase==1,],
              weights = 1/prob.weight, clusters = location.id)
mod.one.ci <- cbind(mod.one$conf.low,mod.one$conf.high)

mod.two <- lm_robust(active.ever ~ exposure + recruitment + lc1.announce, 
              data=ph2.reporters.spill[ph2.reporters.spill$Recruitment.Phase==2,],
              weights = 1/prob.weight, clusters = location.id)
mod.two.ci <- cbind(mod.two$conf.low,mod.two$conf.high)

#lm, for stargazer
mod.pooled <- lm(active.ever ~ exposure + recruitment + lc1.announce + Recruitment.Phase, 
                 data=ph2.reporters.spill, weights = 1/prob.weight)
#summary(mod.pooled)

mod.one <- lm(active.ever ~ exposure + recruitment, 
              data=ph2.reporters.spill[ph2.reporters.spill$Recruitment.Phase==1,],
              weights = 1/prob.weight)
#summary(mod.one)

mod.two <- lm(active.ever ~ exposure + recruitment + lc1.announce, 
              data=ph2.reporters.spill[ph2.reporters.spill$Recruitment.Phase==2,],
              weights = 1/prob.weight)
#summary(mod.two)

#Table
stargazer(mod.pooled, mod.one, mod.two, type = "latex", 
          dep.var.caption  = "DV: At Least One Report During Phase 2",
          dep.var.labels.include = FALSE,
          covariate.labels = c("Control, Indirect", "Treated, No Indirect","Treated, Indirect", "Neighbor Nomination","LC1 Nomination","LC1 Announcement","Phase 2","Intercept"),
          notes = "", notes.append = FALSE, notes.label = "",
          df = FALSE, omit.stat = c("rsq","ser","f"),
          intercept.bottom = TRUE,
          ci = TRUE,
          ci.custom = list(mod.pooled.ci,
                           mod.one.ci,
                           mod.two.ci),
          report = c('vcs'), model.numbers = FALSE,
          column.labels = c("(Pooled)","(P1 Recruits)","(P2 Recruits)")
) #Date and time: Mon, Oct 08, 2018 - 12:15:23
```

## Table H2: Total number of reports submitted by each reporter during Phase 2, considering spillover

```{r tab-h2, warnings=F}

#lm_robust, for CIs
mod.pooled <- lm_robust(total.responses ~ exposure + recruitment + lc1.announce + Recruitment.Phase, 
                 data=ph2.reporters.spill, weights = 1/prob.weight, clusters = location.id)
mod.pooled.ci <- cbind(mod.pooled$conf.low,mod.pooled$conf.high)

mod.one <- lm_robust(total.responses ~ exposure + recruitment, 
              data=ph2.reporters.spill[ph2.reporters.spill$Recruitment.Phase==1,],
              weights = 1/prob.weight, clusters = location.id)
mod.one.ci <- cbind(mod.one$conf.low,mod.one$conf.high)

mod.two <- lm_robust(total.responses ~ exposure + recruitment + lc1.announce, 
              data=ph2.reporters.spill[ph2.reporters.spill$Recruitment.Phase==2,],
              weights = 1/prob.weight, clusters = location.id)
mod.two.ci <- cbind(mod.two$conf.low,mod.two$conf.high)

#lm, for stargazer
mod.pooled <- lm(total.responses ~ exposure + recruitment + lc1.announce + Recruitment.Phase, 
                 data=ph2.reporters.spill, weights = 1/prob.weight)
#summary(mod.pooled)

mod.one <- lm(total.responses ~ exposure + recruitment, 
              data=ph2.reporters.spill[ph2.reporters.spill$Recruitment.Phase==1,],
              weights = 1/prob.weight)
#summary(mod.one)

mod.two <- lm(total.responses ~ exposure + recruitment + lc1.announce, 
              data=ph2.reporters.spill[ph2.reporters.spill$Recruitment.Phase==2,],
              weights = 1/prob.weight)
#summary(mod.two)

#Table
stargazer(mod.pooled, mod.one, mod.two, type = "latex", 
          dep.var.caption  = "DV: Total Number of Reports During Phase 2",
          dep.var.labels.include = FALSE,
          covariate.labels = c("Control, Indirect", "Treated, No Indirect","Treated, Indirect", "Neighbor Nomination","LC1 Nomination","LC1 Announcement","Phase 2","Intercept"),
          notes = "", notes.append = FALSE, notes.label = "",
          df = FALSE, omit.stat = c("rsq","ser","f"),
          intercept.bottom = TRUE,
          ci = TRUE,
          ci.custom = list(mod.pooled.ci,
                           mod.one.ci,
                           mod.two.ci),
          report = c('vcs'), model.numbers = FALSE,
          column.labels = c("(Pooled)","(P1 Recruits)","(P2 Recruits)")
) #Date and time: Mon, Oct 08, 2018 - 12:18:27
```

## Table H3: Total number of reports submitted by each reporter during the last two weeks of Phase 2, considering spillover

```{r tab-h3, warnings=F}

#lm_robust, for CIs
mod.pooled <- lm_robust(last2week.responses ~ exposure + recruitment + lc1.announce + Recruitment.Phase, 
                 data=ph2.reporters.spill, weights = 1/prob.weight, clusters = location.id)
mod.pooled.ci <- cbind(mod.pooled$conf.low,mod.pooled$conf.high)

mod.one <- lm_robust(last2week.responses ~ exposure + recruitment, 
              data=ph2.reporters.spill[ph2.reporters.spill$Recruitment.Phase==1,],
              weights = 1/prob.weight, clusters = location.id)
mod.one.ci <- cbind(mod.one$conf.low,mod.one$conf.high)

mod.two <- lm_robust(last2week.responses ~ exposure + recruitment + lc1.announce, 
              data=ph2.reporters.spill[ph2.reporters.spill$Recruitment.Phase==2,],
              weights = 1/prob.weight, clusters = location.id)
mod.two.ci <- cbind(mod.two$conf.low,mod.two$conf.high)

#lm, for stargazer
mod.pooled <- lm(last2week.responses ~ exposure + recruitment + lc1.announce + Recruitment.Phase, 
                 data=ph2.reporters.spill, weights = 1/prob.weight)
#summary(mod.pooled)

mod.one <- lm(last2week.responses ~ exposure + recruitment, 
              data=ph2.reporters.spill[ph2.reporters.spill$Recruitment.Phase==1,],
              weights = 1/prob.weight)
#summary(mod.one)

mod.two <- lm(last2week.responses ~ exposure + recruitment + lc1.announce, 
              data=ph2.reporters.spill[ph2.reporters.spill$Recruitment.Phase==2,],
              weights = 1/prob.weight)
#summary(mod.two)

#Table
stargazer(mod.pooled, mod.one, mod.two, type = "latex", 
          dep.var.caption  = "DV: Total Number of Reports During Last Two Weeks of Phase 2",
          dep.var.labels.include = FALSE,
          covariate.labels = c("Control, Indirect", "Treated, No Indirect","Treated, Indirect", "Neighbor Nomination","LC1 Nomination","LC1 Announcement","Phase 2","Intercept"),
          notes = "", notes.append = FALSE, notes.label = "",
          df = FALSE, omit.stat = c("rsq","ser","f"),
          intercept.bottom = TRUE,
          ci = TRUE,
          ci.custom = list(mod.pooled.ci,
                           mod.one.ci,
                           mod.two.ci),
          report = c('vcs'), model.numbers = FALSE,
          column.labels = c("(Pooled)","(P1 Recruits)","(P2 Recruits)")
) #Date and time: Mon, Oct 08, 2018 - 12:23:32
```


## Figures I1-I2: CONSORT diagrams

```{r consort-holding}

###########CONSORT functions, Phase 2
unique(output$zone.id[output$assignment=="random"]) #random zones (44), Ntinda Village 7 missing
unique(output$zone.id[output$assignment=="nomination"]) #nomination zones (45)
table(output$assignment) #reporters
rpz <- as.data.frame.matrix(table(output$zone.id,output$assignment))

sort(rpz$nomination)
sort(rpz$random)

###########CONSORT functions, Phase 2
ph1.cont <- subset(ph2.reporters, Recruitment.Phase==1) #1021, vs. 1034 in Ph1 analysis
sum(ph1.cont$full_number %in% output$full_number) #1021, all reporters accounted for
sum(duplicated(ph1.cont$full_number)) #No duplicate numbers

ph1.cont.locs <- unique(ph1.cont$location.id)
#Note: "nakawa.mbuya ii.zone i" is two zones for analysis, both assigned to responsiveness condition in Phase 2

#Comparing to assignment
sum(ph1.cont$full_number %in% ph1.reporters$full_number) #1021, all effective sample in assignment file
ph1.excluded <- ph1.reporters[!(ph1.reporters$full_number %in% ph1.cont$full_number),]
table(ph1.excluded$responsiveness)

table(ph1.cont$Responsiveness)

#Sample sizes reported in Tables: 2,866 pooled, Ph1 recruits: 1021, Ph2 recruits: 1845
ph2.reporters$Nom.Ann.Resp <- paste(ph2.reporters$recruitment,ph2.reporters$lc1.announce,ph2.reporters$Responsiveness,sep="_")

table(ph2.reporters$location.id[ph2.reporters$recruitment=="lc1.recruit" & ph2.reporters$lc1.announce==1 & ph2.reporters$Responsiveness==1])
sum(table(ph2.reporters$location.id[ph2.reporters$recruitment=="lc1.recruit" & ph2.reporters$lc1.announce==1 & ph2.reporters$Responsiveness==1])
)

table(ph2.reporters$location.id[ph2.reporters$recruitment=="lc1.recruit" & ph2.reporters$lc1.announce==1 & ph2.reporters$Responsiveness==0])
sum(table(ph2.reporters$location.id[ph2.reporters$recruitment=="lc1.recruit" & ph2.reporters$lc1.announce==1 & ph2.reporters$Responsiveness==0])
)

table(ph2.reporters$location.id[ph2.reporters$recruitment=="lc1.recruit" & ph2.reporters$lc1.announce==0 & ph2.reporters$Responsiveness==1])
sum(table(ph2.reporters$location.id[ph2.reporters$recruitment=="lc1.recruit" & ph2.reporters$lc1.announce==0 & ph2.reporters$Responsiveness==1])
)

table(ph2.reporters$location.id[ph2.reporters$recruitment=="lc1.recruit" & ph2.reporters$lc1.announce==0 & ph2.reporters$Responsiveness==0])
sum(table(ph2.reporters$location.id[ph2.reporters$recruitment=="lc1.recruit" & ph2.reporters$lc1.announce==0 & ph2.reporters$Responsiveness==0])
)

table(ph2.reporters$location.id[ph2.reporters$recruitment=="random" & ph2.reporters$lc1.announce==1 & ph2.reporters$Responsiveness==1])
sum(table(ph2.reporters$location.id[ph2.reporters$recruitment=="random" & ph2.reporters$lc1.announce==1 & ph2.reporters$Responsiveness==1])
)

table(ph2.reporters$location.id[ph2.reporters$recruitment=="random" & ph2.reporters$lc1.announce==1 & ph2.reporters$Responsiveness==0])
sum(table(ph2.reporters$location.id[ph2.reporters$recruitment=="random" & ph2.reporters$lc1.announce==1 & ph2.reporters$Responsiveness==0])
)

table(ph2.reporters$location.id[ph2.reporters$recruitment=="random" & ph2.reporters$lc1.announce==0 & ph2.reporters$Responsiveness==1 & ph2.reporters$Recruitment.Phase==2])
sum(table(ph2.reporters$location.id[ph2.reporters$recruitment=="random" & ph2.reporters$lc1.announce==0 & ph2.reporters$Responsiveness==1 & ph2.reporters$Recruitment.Phase==2])
)

table(ph2.reporters$location.id[ph2.reporters$recruitment=="random" & ph2.reporters$lc1.announce==0 & ph2.reporters$Responsiveness==0 & ph2.reporters$Recruitment.Phase==2])
sum(table(ph2.reporters$location.id[ph2.reporters$recruitment=="random" & ph2.reporters$lc1.announce==0 & ph2.reporters$Responsiveness==0 & ph2.reporters$Recruitment.Phase==2])
)
```
